diff --git a/deploy/cpp/src/paddlex.cpp b/deploy/cpp/src/paddlex.cpp index b3e292c23e781d675ad7e23512fe96672d4b8121..90a4a4452b9e5f3eba1c0b4c7ab88f5b91e03971 100644 --- a/deploy/cpp/src/paddlex.cpp +++ b/deploy/cpp/src/paddlex.cpp @@ -98,7 +98,7 @@ bool Model::load_config(const std::string& model_dir) { bool Model::preprocess(const cv::Mat& input_im, ImageBlob* blob) { cv::Mat im = input_im.clone(); - if (!transforms_.Run(&im, &inputs_)) { + if (!transforms_.Run(&im, blob)) { return false; } return true; diff --git a/docs/FAQ.md b/docs/FAQ.md index 8da14f32e428f868f637a395223855aa66371bbf..b120ebd10ed791c65c3f65e611c5b45da2a9211f 100755 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -60,3 +60,9 @@ ## 11. 每次训练新的模型,都需要重新下载预训练模型,怎样可以下载一次就搞定 > 1.可以按照9的方式来解决这个问题 > 2.每次训练前都设定`paddlex.pretrain_dir`路径,如设定`paddlex.pretrain_dir='/usrname/paddlex`,如此下载完的预训练模型会存放至`/usrname/paddlex`目录下,而已经下载在该目录的模型也不会再次重复下载 + +## 12. 程序启动时提示"Failed to execute script PaddleX",如何解决? +> 1. 请检查目标机器上PaddleX程序所在路径是否包含中文。目前暂不支持中文路径,请尝试将程序移动到英文目录。 +> 2. 如果您的系统是Windows 7或者Windows Server 2012时,原因是缺少MFPlat.DLL/MF.dll/MFReadWrite.dll等OpenCV依赖的DLL,请按如下方式安装桌面体验:通过“我的电脑”-->“属性”-->"管理"打开服务器管理器,点击右上角“管理”选择“添加角色和功能”。点击“服务器选择”-->“功能”,拖动滚动条到最下端,点开“用户界面和基础结构”,勾选“桌面体验”后点击“安装”,等安装完成尝试再次运行PaddleX。 +> 3. 请检查目标机器上是否有其他的PaddleX程序或者进程在运行中,如有请退出或者重启机器看是否解决 +> 4. 请确认运行程序的用户是否有管理员权限,如非管理员权限用户请尝试使用管理员运行看是否成功 \ No newline at end of file diff --git a/docs/apis/datasets/detection.md b/docs/apis/datasets/detection.md index e660d7edfa9cfc41582902b92bcf0b0977766222..a32b6be5de6246ef6e28ebe376ded7e3faf82ff7 100755 --- a/docs/apis/datasets/detection.md +++ b/docs/apis/datasets/detection.md @@ -8,7 +8,7 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None, > 仅用于**目标检测**。读取PascalVOC格式的检测数据集,并对样本进行相应的处理。PascalVOC数据集格式的介绍可查看文档:[数据集格式说明](../datasets.md) -> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_mobilenetv1.py#L29) +> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_darknet53.py#L29) > **参数** @@ -21,6 +21,16 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None, > > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。 > > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。 +> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义VOCDetection类后调用其成员函数`add_negative_samples`添加背景图片即可: +> ``` +> add_negative_samples(image_dir) +> ``` +> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4) + +> > **参数** + +> > > * **image_dir** (str): 背景图片所在的目录路径。 + ## CocoDetection类 ``` @@ -41,6 +51,16 @@ paddlex.datasets.CocoDetection(data_dir, ann_file, transforms=None, num_workers= > > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。 > > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。 +> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义CocoDetection类后调用其成员函数`add_negative_samples`添加背景图片即可: +> ``` +> add_negative_samples(image_dir) +> ``` +> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4) + +> > **参数** + +> > > * **image_dir** (str): 背景图片所在的目录路径。 + ## EasyDataDet类 ``` @@ -59,5 +79,15 @@ paddlex.datasets.EasyDataDet(data_dir, file_list, label_list, transforms=None, n > > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。 > > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。 > > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。 -> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。 +> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。 + + +> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义EasyDataDet类后调用其成员函数`add_negative_samples`添加背景图片即可: +> ``` +> add_negative_samples(image_dir) +> ``` +> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4) + +> > **参数** +> > > * **image_dir** (str): 背景图片所在的目录路径。 diff --git a/docs/apis/models/classification.md b/docs/apis/models/classification.md index 14bbcb1efd05b8afcc05027d5046942fa161e406..82b459d8281b1e9bc9d1f7abdd48fddb16473c21 100755 --- a/docs/apis/models/classification.md +++ b/docs/apis/models/classification.md @@ -15,7 +15,7 @@ paddlex.cls.ResNet50(num_classes=1000) ### train 训练接口 ```python -train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None) +train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, warmup_steps=0, warmup_start_lr=0.0, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None) ``` > > **参数** @@ -30,6 +30,8 @@ train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, s > > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.025。 +> > - **warmup_steps** (int): 默认优化器的warmup步数,学习率将在设定的步数内,从warmup_start_lr线性增长至设定的learning_rate,默认为0。 +> > - **warmup_start_lr**(float): 默认优化器的warmup起始学习率,默认为0.0。 > > - **lr_decay_epochs** (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。 > > - **lr_decay_gamma** (float): 默认优化器的学习率衰减率。默认为0.1。 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。 diff --git a/docs/apis/transforms/det_transforms.md b/docs/apis/transforms/det_transforms.md index 6d9c32815465ff02995dc2b1f80ff68d6bc08edb..3b182a1e4eeb7fdbe2d40c7530989d54919d8ec2 100755 --- a/docs/apis/transforms/det_transforms.md +++ b/docs/apis/transforms/det_transforms.md @@ -232,12 +232,12 @@ eval_transforms = transforms.Composed([ ``` -## ComposedYOLOTransforms类 +## ComposedYOLOv3Transforms类 ```python -paddlex.det.transforms.ComposedYOLOTransforms(mode, shape=[608, 608], mixup_epoch=250, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) +paddlex.det.transforms.ComposedYOLOv3Transforms(mode, shape=[608, 608], mixup_epoch=250, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ``` -目标检测YOLOv3模型中已经组合好的数据处理流程,开发者可以直接使用ComposedYOLOTransforms,简化手动组合transforms的过程, 该类中已经包含了[MixupImage](#MixupImage)、[RandomDistort](#RandomDistort)、[RandomExpand](#RandomExpand)、[RandomCrop](#RandomCrop)、[RandomHorizontalFlip](#RandomHorizontalFlip)5种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。 -ComposedYOLOTransforms共包括以下几个步骤: +目标检测YOLOv3模型中已经组合好的数据处理流程,开发者可以直接使用ComposedYOLOv3Transforms,简化手动组合transforms的过程, 该类中已经包含了[MixupImage](#MixupImage)、[RandomDistort](#RandomDistort)、[RandomExpand](#RandomExpand)、[RandomCrop](#RandomCrop)、[RandomHorizontalFlip](#RandomHorizontalFlip)5种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。 +ComposedYOLOv3Transforms共包括以下几个步骤: > 训练阶段: > > 1. 在前mixup_epoch轮迭代中,使用MixupImage策略 > > 2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调 @@ -259,7 +259,7 @@ ComposedYOLOTransforms共包括以下几个步骤: ### 添加数据增强方式 ```python -ComposedYOLOTransforms.add_augmenters(augmenters) +ComposedYOLOv3Transforms.add_augmenters(augmenters) ``` > **参数** > * **augmenters**(list): 数据增强方式列表 @@ -268,8 +268,8 @@ ComposedYOLOTransforms.add_augmenters(augmenters) ``` import paddlex as pdx from paddlex.det import transforms -train_transforms = transforms.ComposedYOLOTransforms(mode='train', shape=[480, 480]) -eval_transforms = transforms.ComposedYOLOTransforms(mode='eval', shape=[480, 480]) +train_transforms = transforms.ComposedYOLOv3Transforms(mode='train', shape=[480, 480]) +eval_transforms = transforms.ComposedYOLOv3Transforms(mode='eval', shape=[480, 480]) # 添加数据增强 import imgaug.augmenters as iaa diff --git a/docs/appendix/model_zoo.md b/docs/appendix/model_zoo.md index c75f41729d60761f326284839f3a9dd2addeaddc..200847bc95aec5872879c3fbbe49b6f2ed0c741e 100644 --- a/docs/appendix/model_zoo.md +++ b/docs/appendix/model_zoo.md @@ -6,48 +6,56 @@ | 模型 | 模型大小 | 预测速度(毫秒) | Top1准确率(%) | Top5准确率(%) | | :----| :------- | :----------- | :--------- | :--------- | -| ResNet18| 46.9MB | 1.499 | 71.0 | 89.9 | -| ResNet34| 87.5MB | 2.272 | 74.6 | 92.1 | -| ResNet50| 102.7MB | 2.939 | 76.5 | 93.0 | -| ResNet101 |179.1MB | 5.314 | 77.6 | 93.6 | -| ResNet50_vd |102.8MB | 3.165 | 79.1 | 94.4 | -| ResNet101_vd| 179.2MB | 5.252 | 80.2 | 95.0 | -| ResNet50_vd_ssld |102.8MB | 3.165 | 82.4 | 96.1 | -| ResNet101_vd_ssld| 179.2MB | 5.252 | 83.7 | 96.7 | -| DarkNet53|166.9MB | 3.139 | 78.0 | 94.1 | -| MobileNetV1 | 16.0MB | 32.523 | 71.0 | 89.7 | -| MobileNetV2 | 14.0MB | 23.318 | 72.2 | 90.7 | -| MobileNetV3_large| 21.0MB | 19.308 | 75.3 | 93.2 | -| MobileNetV3_small | 12.0MB | 6.546 | 68.2 | 88.1 | -| MobileNetV3_large_ssld| 21.0MB | 19.308 | 79.0 | 94.5 | -| MobileNetV3_small_ssld | 12.0MB | 6.546 | 71.3 | 90.1 | -| Xception41 |92.4MB | 4.408 | 79.6 | 94.4 | -| Xception65 | 144.6MB | 6.464 | 80.3 | 94.5 | -| DenseNet121 | 32.8MB | 4.371 | 75.7 | 92.6 | -| DenseNet161|116.3MB | 8.863 | 78.6 | 94.1 | -| DenseNet201| 84.6MB | 8.173 | 77.6 | 93.7 | -| ShuffleNetV2 | 9.0MB | 10.941 | 68.8 | 88.5 | -| HRNet_W18 | 21.29MB | 7.368 (V100 GPU) | 76.9 | 93.4 | +| [ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar)| 46.2MB | 3.72882 | 71.0 | 89.9 | +| [ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar)| 87.9MB | 5.50876 | 74.6 | 92.1 | +| [ResNet50](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar)| 103.4MB | 7.76659 | 76.5 | 93.0 | +| [ResNet101](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) |180.4MB | 13.80876 | 77.6 | 93.6 | +| [ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) |103.5MB | 8.20476 | 79.1 | 94.4 | +| [ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar)| 180.5MB | 14.24643 | 80.2 | 95.0 | +| [ResNet50_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_ssld_pretrained.tar) |103.5MB | 7.79264 | 82.4 | 96.1 | +| [ResNet101_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_ssld_pretrained.tar)| 180.5MB | 13.34580 | 83.7 | 96.7 | +| [DarkNet53](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_ImageNet1k_pretrained.tar)|167.4MB | 8.82047 | 78.0 | 94.1 | +| [MobileNetV1](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 17.4MB | 3.42838 | 71.0 | 89.7 | +| [MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 15.0MB | 5.92667 | 72.2 | 90.7 | +| [MobileNetV3_large](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_pretrained.tar)| 22.8MB | 8.31428 | 75.3 | 93.2 | +| [MobileNetV3_small](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_pretrained.tar) | 12.5MB | 7.30689 | 68.2 | 88.1 | +| [MobileNetV3_large_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_ssld_pretrained.tar)| 22.8MB | 8.06651 | 79.0 | 94.5 | +| [MobileNetV3_small_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_ssld_pretrained.tar) | 12.5MB | 7.08837 | 71.3 | 90.1 | +| [Xception41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_deeplab_pretrained.tar) | 109.2MB | 8.15611 | 79.6 | 94.4 | +| [Xception65](https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar) | 161.6MB | 13.87017 | 80.3 | 94.5 | +| [DenseNet121](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet121_pretrained.tar) | 33.1MB | 17.09874 | 75.7 | 92.6 | +| [DenseNet161](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet161_pretrained.tar)| 118.0MB | 22.79690 | 78.6 | 94.1 | +| [DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet201_pretrained.tar)| 84.1MB | 25.26089 | 77.6 | 93.7 | +| [ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 10.2MB | 15.40138 | 68.8 | 88.5 | +| [HRNet_W18](https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar) | 21.29MB |45.25514 | 76.9 | 93.4 | ## 目标检测模型 -> 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla V100测试得到,表中符号`-`表示相关指标暂未测试。 +> 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla V100测试得到),表中符号`-`表示相关指标暂未测试。 | 模型 | 模型大小 | 预测时间(毫秒) | BoxAP(%) | |:-------|:-----------|:-------------|:----------| -|FasterRCNN-ResNet50|135.6MB| 78.450 | 35.2 | -|FasterRCNN-ResNet50_vd| 135.7MB | 79.523 | 36.4 | -|FasterRCNN-ResNet101| 211.7MB | 107.342 | 38.3 | -|FasterRCNN-ResNet50-FPN| 167.2MB | 44.897 | 37.2 | -|FasterRCNN-ResNet50_vd-FPN|168.7MB | 45.773 | 38.9 | -|FasterRCNN-ResNet101-FPN| 251.7MB | 55.782 | 38.7 | -|FasterRCNN-ResNet101_vd-FPN |252MB | 58.785 | 40.5 | -|FasterRCNN-HRNet_W18-FPN |115.5MB | 57.11 | 36 | -|YOLOv3-DarkNet53|252.4MB | 21.944 | 38.9 | -|YOLOv3-MobileNetv1 |101.2MB | 12.771 | 29.3 | -|YOLOv3-MobileNetv3|94.6MB | - | 31.6 | -| YOLOv3-ResNet34|169.7MB | 15.784 | 36.2 | +|[FasterRCNN-ResNet50](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar)|136.0MB| 197.715 | 35.2 | +|[FasterRCNN-ResNet50_vd](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_1x.tar)| 136.1MB | 475.700 | 36.4 | +|[FasterRCNN-ResNet101](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_1x.tar)| 212.5MB | 582.911 | 38.3 | +|[FasterRCNN-ResNet50-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_1x.tar)| 167.7MB | 83.189 | 37.2 | +|[FasterRCNN-ResNet50_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_2x.tar)|167.8MB | 128.277 | 38.9 | +|[FasterRCNN-ResNet101-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_1x.tar)| 244.2MB | 156.097 | 38.7 | +|[FasterRCNN-ResNet101_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_2x.tar) |244.3MB | 119.788 | 40.5 | +|[FasterRCNN-HRNet_W18-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_hrnetv2p_w18_1x.tar) |115.5MB | 81.592 | 36 | +|[YOLOv3-DarkNet53](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar)|249.2MB | 42.672 | 38.9 | +|[YOLOv3-MobileNetV1](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) |99.2MB | 15.442 | 29.3 | +|[YOLOv3-MobileNetV3_large](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams)|100.7MB | 143.322 | 31.6 | +| [YOLOv3-ResNet34](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar)|170.3MB | 23.185 | 36.2 | ## 实例分割模型 > 表中模型相关指标均为在MSCOCO数据集上测试得到。 + +| 模型 | 模型大小 | 预测时间(毫秒) | mIoU(%) | +|:-------|:-----------|:-------------|:----------| +|DeepLabv3+-MobileNetV2_x1.0|-| - | - | +|DeepLabv3+-Xception41|-| - | - | +|DeepLabv3+-Xception65|-| - | - | +|UNet|-| - | - | +|HRNet_w18|-| - | - | diff --git a/docs/appendix/parameters.md b/docs/appendix/parameters.md index 732535dd08f2cc16e16e14fe2a45cd655706dc4c..716cdac92aa6504f1543cb91997f2f1fd89a3e13 100644 --- a/docs/appendix/parameters.md +++ b/docs/appendix/parameters.md @@ -23,3 +23,35 @@ Batch Size指模型在训练过程中,一次性处理的样本数量, 如若 - [实例分割MaskRCNN-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#train) - [语义分割DeepLabv3p-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#train) - [语义分割UNet](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#id2) + +## 关于lr_decay_epoch, warmup_steps等参数的说明 + +在PaddleX或其它深度学习模型的训练过程中,经常见到lr_decay_epoch, warmup_steps, warmup_start_lr等参数设置,下面介绍一些这些参数的作用。 + +首先这些参数都是用于控制模型训练过程中学习率的变化方式,例如我们在训练时将learning_rate设为0.1, 通常情况,在模型的训练过程中,学习率一直以0.1不变训练下去, 但为了调出更好的模型效果,我们往往不希望学习率一直保持不变。 + +### warmup_steps和warmup_start_lr + +我们在训练模型时,一般都会使用预训练模型,例如检测模型在训练时使用backbone在ImageNet数据集上的预训练权重。但由于在自行训练时,自己的数据与ImageNet数据集存在较大的差异,可能会一开始由于梯度过大使得训练出现问题,因此可以在刚开始训练时,让学习率以一个较小的值,慢慢增长到设定的学习率。因此`warmup_steps`和`warmup_start_lr`就是这个作用,模型开始训练时,学习率会从`warmup_start_lr`开始,在`warmup_steps`内线性增长到设定的学习率。 + +### lr_decay_epochs和lr_decay_gamma + +`lr_decay_epochs`用于让学习率在模型训练后期逐步衰减,它一般是一个list,如[6, 8, 10],表示学习率在第6个epoch时衰减一次,第8个epoch时再衰减一次,第10个epoch时再衰减一次。每次学习率衰减为之前的学习率*lr_decay_gamma + +### Notice + +在PaddleX中,限制warmup需要在第一个学习率decay衰减前结束,因此要满足下面的公式 +``` +warmup_steps <= lr_decay_epochs[0] * num_steps_each_epoch +``` +其中公式中`num_steps_each_epoch = num_samples_in_train_dataset // train_batch_size`。 + +> 因此如若在训练时PaddleX提示`warmup_steps should be less than xxx`时,即可根据上述公式来调整你的`lr_decay_epochs`或者是`warmup_steps`使得两个参数满足上面的条件 + +> - 图像分类模型 [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#train) +> - FasterRCNN [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn) +> - YOLOv3 [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3) +> - MaskRCNN [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn) +> - DeepLab [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p) +> - UNet [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet) +> - HRNet [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet) diff --git "a/docs/images/._\346\226\207\344\273\266(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png" "b/docs/images/._\346\226\207\344\273\266(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png" deleted file mode 100644 index 0eea21e4388200485959746b4135f58d45711883..0000000000000000000000000000000000000000 Binary files "a/docs/images/._\346\226\207\344\273\266(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png" and /dev/null differ diff --git "a/docs/images/\346\226\207\344\273\266(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png" "b/docs/images/\346\226\207\344\273\266(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png" deleted file mode 100644 index 17e25a531e68c7655e46ad52d6c607e62bdcc6d6..0000000000000000000000000000000000000000 Binary files "a/docs/images/\346\226\207\344\273\266(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png" and /dev/null differ diff --git a/docs/index.rst b/docs/index.rst index 0f876e6337ce4ea181b7558a5654808774f50572..1d8d9c0c124dd0015e7b90634fcb1b1551db87f5 100755 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,6 +26,7 @@ PaddleX是基于飞桨核心框架、开发套件和工具组件的深度学习 cv_solutions.md apis/index.rst paddlex_gui/index.rst + tuning_strategy/index.rst update.md FAQ.md appendix/index.rst diff --git a/docs/tuning_strategy/detection/index.rst b/docs/tuning_strategy/detection/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..5457adeeea053df4de9332bd4df61cd450830f96 --- /dev/null +++ b/docs/tuning_strategy/detection/index.rst @@ -0,0 +1,10 @@ +目标检测 +============================ + +PaddleX针对目标检测任务提供了通过负样本学习降低误检率的策略,用户可根据需求及应用场景使用该策略对模型进行调优。 + +.. toctree:: + :maxdepth: 1 + + negatives_training.md + diff --git a/docs/tuning_strategy/detection/negatives_training.md b/docs/tuning_strategy/detection/negatives_training.md new file mode 100644 index 0000000000000000000000000000000000000000..d3590e3222018faf90462935588a785b8fae4e7f --- /dev/null +++ b/docs/tuning_strategy/detection/negatives_training.md @@ -0,0 +1,101 @@ +# 通过负样本学习降低误检率 + +## 应用场景 + +在背景和目标相似的场景下,模型容易把背景误检成目标。为了降低误检率,可以通过负样本学习来降低误检率,即在训练过程中把无目标真值的图片加入训练。 + +## 效果对比 + +* 与基准模型相比,通过负样本学习后的模型**mmAP有3.6%的提升,mAP有0.1%的提升**。 +* 与基准模型相比,通过负样本学习后的模型在背景图片上的图片级别**误检率降低了49.68%**。 + +表1 违禁品验证集上**框级别精度**对比 + +||mmAP(AP@IoU=0.5:0.95)| mAP (AP@IoU=0.5)| +|:---|:---|:---| +|基准模型 | 45.8% | 83% | +|通过负样本学习后的模型 | 49.4% | 83.1% | + +表2 违禁品验证集上**图片级别的召回率**、无违禁品验证集上**图片级别的误检率**对比 + +||违禁品图片级别的召回率| 无违禁品图片级别的误检率| +|:---|:--------------------|:------------------------| +|基准模型 | 98.97% | 55.27% | +|通过负样本学习后的模型 | 97.75% | 5.59% | + +【名词解释】 + + * 图片级别的召回率:只要在有目标的图片上检测出目标(不论框的个数),该图片被认为召回。批量有目标图片中被召回图片所占的比例,即为图片级别的召回率。 + + * 图片级别的误检率:只要在无目标的图片上检测出目标(不论框的个数),该图片被认为误检。批量无目标图片中被误检图片所占的比例,即为图片级别的误检率。 + + +## 使用方法 + +在定义训练所用的数据集之后,使用数据集类的成员函数`add_negative_samples`将无目标真值的背景图片所在路径传入给训练集。代码示例如下: + +``` +import os +os.environ['CUDA_VISIBLE_DEVICES'] = '0' +from paddlex.det import transforms +import paddlex as pdx + +# 定义训练和验证时的transforms +train_transforms = transforms.ComposedRCNNTransforms( + mode='train', min_max_size=[600, 1000]) +eval_transforms = transforms.ComposedRCNNTransforms( + mode='eval', min_max_size=[600, 1000]) + +# 定义训练所用的数据集 +train_dataset = pdx.datasets.CocoDetection( + data_dir='jinnan2_round1_train_20190305/restricted/', + ann_file='jinnan2_round1_train_20190305/train.json', + transforms=train_transforms, + shuffle=True, + num_workers=2) +# 训练集中加入无目标背景图片 +train_dataset.add_negative_samples( + 'jinnan2_round1_train_20190305/normal_train_back/') + +# 定义验证所用的数据集 +eval_dataset = pdx.datasets.CocoDetection( + data_dir='jinnan2_round1_train_20190305/restricted/', + ann_file='jinnan2_round1_train_20190305/val.json', + transforms=eval_transforms, + num_workers=2) + +# 初始化模型,并进行训练 +model = pdx.det.FasterRCNN(num_classes=len(train_dataset.labels) + 1) +model.train( + num_epochs=17, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + train_batch_size=8, + learning_rate=0.01, + lr_decay_epochs=[13, 16], + save_dir='./output') +``` + +## 实验细则 + +(1) 数据集 + +我们使用X光违禁品数据集对通过负样本学习降低误检率的策略有效性进行了实验验证。该数据集中背景比较繁杂,很多背景物体与目标物体较为相似。 + +* 检测铁壳打火机、黑钉打火机 、刀具、电源和电池、剪刀5种违禁品。 + +* 训练集有883张违禁品图片,验证集有98张违禁品图片。 + +* 无违禁品的X光图片有2540张。 + +(2) 基准模型 + +使用FasterRCNN-ResNet50作为检测模型,除了水平翻转外没有使用其他的数据增强方式,只使用违禁品训练集进行训练。模型在违禁品验证集上的精度见表1,mmAP有45.8%,mAP达到83%。 + +(3) 通过负样本学习后的模型 + +把无违禁品的X光图片按1:1分成无违禁品训练集和无违禁品验证集。我们将基准模型在无违禁品验证集进行测试,发现图片级别的误检率高达55.27%。为了降低该误检率,将基准模型在无违禁品训练集进行测试,挑选出被误检图片共663张,将这663张图片加入训练,训练参数配置与基准模型训练时一致。 + +通过负样本学习后的模型在违禁品验证集上的精度见表1,mmAP有49.4%,mAP达到83.1%。与基准模型相比,**mmAP有3.6%的提升,mAP有0.1%的提升**。通过负样本学习后的模型在无违禁品验证集的误检率仅有5.58%,与基准模型相比,**误检率降低了49.68%**。 + +此外,还测试了两个模型在有违禁品验证集上图片级别的召回率,见表2,与基准模型相比,通过负样本学习后的模型仅漏检了1张图片,召回率几乎是无损的。 diff --git a/docs/tuning_strategy/index.rst b/docs/tuning_strategy/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..f9d5cd50f914609f864135dfba922f857f771dbf --- /dev/null +++ b/docs/tuning_strategy/index.rst @@ -0,0 +1,7 @@ +PaddleX调优策略介绍 +============================ + +.. toctree:: + :maxdepth: 2 + + detection/index.rst diff --git a/new_tutorials/train/classification/mobilenetv2.py b/new_tutorials/train/classification/mobilenetv2.py index 3f637125b760de6d992d6a062e4d456bf5038426..9a075526a3cbb7e560c133f08faef68ea5a07121 100644 --- a/new_tutorials/train/classification/mobilenetv2.py +++ b/new_tutorials/train/classification/mobilenetv2.py @@ -10,18 +10,12 @@ veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz' pdx.utils.download_and_decompress(veg_dataset, path='./') # 定义训练和验证时的transforms -train_transforms = transforms.Compose([ - transforms.RandomCrop(crop_size=224), - transforms.RandomHorizontalFlip(), - transforms.Normalize() -]) -eval_transforms = transforms.Compose([ - transforms.ResizeByShort(short_size=256), - transforms.CenterCrop(crop_size=224), - transforms.Normalize() -]) +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms +train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224]) +eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224]) # 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet train_dataset = pdx.datasets.ImageNet( data_dir='vegetables_cls', file_list='vegetables_cls/train_list.txt', @@ -39,6 +33,8 @@ eval_dataset = pdx.datasets.ImageNet( # VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001 # 浏览器打开 https://0.0.0.0:8001即可 # 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP + +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50 model = pdx.cls.MobileNetV2(num_classes=len(train_dataset.labels)) model.train( num_epochs=10, diff --git a/new_tutorials/train/classification/resnet50.py b/new_tutorials/train/classification/resnet50.py index 2e5a9b4820c7e66a83abaca0b13e057b15ceb830..bf56a605f1c3376057c1ab9283fa1251491b2750 100644 --- a/new_tutorials/train/classification/resnet50.py +++ b/new_tutorials/train/classification/resnet50.py @@ -11,16 +11,12 @@ veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz' pdx.utils.download_and_decompress(veg_dataset, path='./') # 定义训练和验证时的transforms -train_transforms = transforms.Compose( - [transforms.RandomCrop(crop_size=224), - transforms.Normalize()]) -eval_transforms = transforms.Compose([ - transforms.ResizeByShort(short_size=256), - transforms.CenterCrop(crop_size=224), - transforms.Normalize() -]) +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms +train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224]) +eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224]) # 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet train_dataset = pdx.datasets.ImageNet( data_dir='vegetables_cls', file_list='vegetables_cls/train_list.txt', @@ -47,6 +43,8 @@ optimizer = fluid.optimizer.Momentum( # VisualDL启动方式: visualdl --logdir output/resnet50/vdl_log --port 8001 # 浏览器打开 https://0.0.0.0:8001即可 # 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP + +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50 model = pdx.cls.ResNet50(num_classes=len(train_dataset.labels)) model.train( num_epochs=10, diff --git a/new_tutorials/train/detection/faster_rcnn_r50_fpn.py b/new_tutorials/train/detection/faster_rcnn_r50_fpn.py index cbe6dabe535b5972418349ac31576b344652e69d..a64b711c3af48cb85cfd8a82938785ca386a99ec 100644 --- a/new_tutorials/train/detection/faster_rcnn_r50_fpn.py +++ b/new_tutorials/train/detection/faster_rcnn_r50_fpn.py @@ -10,20 +10,12 @@ insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz' pdx.utils.download_and_decompress(insect_dataset, path='./') # 定义训练和验证时的transforms -train_transforms = transforms.Compose([ - transforms.RandomHorizontalFlip(), - transforms.Normalize(), - transforms.ResizeByShort(short_size=800, max_size=1333), - transforms.Padding(coarsest_stride=32) -]) - -eval_transforms = transforms.Compose([ - transforms.Normalize(), - transforms.ResizeByShort(short_size=800, max_size=1333), - transforms.Padding(coarsest_stride=32), -]) +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms +train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333]) +eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333]) # 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection train_dataset = pdx.datasets.VOCDetection( data_dir='insect_det', file_list='insect_det/train_list.txt', @@ -42,6 +34,8 @@ eval_dataset = pdx.datasets.VOCDetection( # 浏览器打开 https://0.0.0.0:8001即可 # 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP # num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1 + +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn num_classes = len(train_dataset.labels) + 1 model = pdx.det.FasterRCNN(num_classes=num_classes) model.train( diff --git a/new_tutorials/train/detection/mask_rcnn_r50_fpn.py b/new_tutorials/train/detection/mask_rcnn_r50_fpn.py index 15a6b840528fe7948c80f4cf605498cf55b5c918..f2ebf6e20f18054bf16452eb6e60b9ea24f20748 100644 --- a/new_tutorials/train/detection/mask_rcnn_r50_fpn.py +++ b/new_tutorials/train/detection/mask_rcnn_r50_fpn.py @@ -10,20 +10,12 @@ xiaoduxiong_dataset = 'https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_de pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./') # 定义训练和验证时的transforms -train_transforms = transforms.Compose([ - transforms.RandomHorizontalFlip(), - transforms.Normalize(), - transforms.ResizeByShort(short_size=800, max_size=1333), - transforms.Padding(coarsest_stride=32) -]) - -eval_transforms = transforms.Compose([ - transforms.Normalize(), - transforms.ResizeByShort(short_size=800, max_size=1333), - transforms.Padding(coarsest_stride=32) -]) +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms +train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333]) +eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333]) # 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#cocodetection train_dataset = pdx.datasets.CocoDetection( data_dir='xiaoduxiong_ins_det/JPEGImages', ann_file='xiaoduxiong_ins_det/train.json', @@ -40,6 +32,8 @@ eval_dataset = pdx.datasets.CocoDetection( # 浏览器打开 https://0.0.0.0:8001即可 # 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP # num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1 + +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn num_classes = len(train_dataset.labels) + 1 model = pdx.det.MaskRCNN(num_classes=num_classes) model.train( diff --git a/new_tutorials/train/detection/yolov3_darknet53.py b/new_tutorials/train/detection/yolov3_darknet53.py index c38656b04e9a35cd033dc583811c58aa8baafba2..8027a506458aac94de82a915aa8b058d71ba97f7 100644 --- a/new_tutorials/train/detection/yolov3_darknet53.py +++ b/new_tutorials/train/detection/yolov3_darknet53.py @@ -10,22 +10,12 @@ insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz' pdx.utils.download_and_decompress(insect_dataset, path='./') # 定义训练和验证时的transforms -train_transforms = transforms.Compose([ - transforms.MixupImage(mixup_epoch=250), - transforms.RandomDistort(), - transforms.RandomExpand(), - transforms.RandomCrop(), - transforms.Resize(target_size=608, interp='RANDOM'), - transforms.RandomHorizontalFlip(), - transforms.Normalize(), -]) - -eval_transforms = transforms.Compose([ - transforms.Resize(target_size=608, interp='CUBIC'), - transforms.Normalize(), -]) +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedyolotransforms +train_transforms = transforms.ComposedYOLOv3Transforms(mode='train', shape=[608, 608]) +eval_transforms = transforms.ComposedYOLOv3Transforms(mode='eva', shape=[608, 608]) # 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection train_dataset = pdx.datasets.VOCDetection( data_dir='insect_det', file_list='insect_det/train_list.txt', @@ -43,6 +33,8 @@ eval_dataset = pdx.datasets.VOCDetection( # VisualDL启动方式: visualdl --logdir output/yolov3_darknet/vdl_log --port 8001 # 浏览器打开 https://0.0.0.0:8001即可 # 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP + +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3 num_classes = len(train_dataset.labels) model = pdx.det.YOLOv3(num_classes=num_classes, backbone='DarkNet53') model.train( diff --git a/new_tutorials/train/segmentation/deeplabv3p.py b/new_tutorials/train/segmentation/deeplabv3p.py index 346a229a358a76830112acfd596740c070822874..cb18fcfad65331d02b04abe3c3a76fa0356fb5b8 100644 --- a/new_tutorials/train/segmentation/deeplabv3p.py +++ b/new_tutorials/train/segmentation/deeplabv3p.py @@ -10,17 +10,16 @@ optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz' pdx.utils.download_and_decompress(optic_dataset, path='./') # 定义训练和验证时的transforms -train_transforms = transforms.Compose([ - transforms.RandomHorizontalFlip(), - transforms.Resize(target_size=512), - transforms.RandomPaddingCrop(crop_size=500), - transforms.Normalize() -]) +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms +train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769]) +eval_transforms = transforms.ComposedSegTransforms(mode='eval') -eval_transforms = transforms.Compose( - [transforms.Resize(512), transforms.Normalize()]) +train_transforms.add_augmenters([ + transforms.RandomRotate() +]) # 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset train_dataset = pdx.datasets.SegDataset( data_dir='optic_disc_seg', file_list='optic_disc_seg/train_list.txt', @@ -38,6 +37,8 @@ eval_dataset = pdx.datasets.SegDataset( # VisualDL启动方式: visualdl --logdir output/deeplab/vdl_log --port 8001 # 浏览器打开 https://0.0.0.0:8001即可 # 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP + +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p num_classes = len(train_dataset.labels) model = pdx.seg.DeepLabv3p(num_classes=num_classes) model.train( diff --git a/new_tutorials/train/segmentation/hrnet.py b/new_tutorials/train/segmentation/hrnet.py index f887b78c3ae16ae66235f1965ada8bd2355d62c6..98fdd1b925bd4707001fdad56b3ffdc6bb2b58ae 100644 --- a/new_tutorials/train/segmentation/hrnet.py +++ b/new_tutorials/train/segmentation/hrnet.py @@ -10,17 +10,12 @@ optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz' pdx.utils.download_and_decompress(optic_dataset, path='./') # 定义训练和验证时的transforms -train_transforms = transforms.Compose([ - transforms.RandomHorizontalFlip(), transforms.ResizeRangeScaling(), - transforms.RandomPaddingCrop(crop_size=512), transforms.Normalize() -]) - -eval_transforms = transforms.Compose([ - transforms.ResizeByLong(long_size=512), - transforms.Padding(target_size=512), transforms.Normalize() -]) +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms +train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769]) +eval_transforms = transforms.ComposedSegTransforms(mode='eval') # 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset train_dataset = pdx.datasets.SegDataset( data_dir='optic_disc_seg', file_list='optic_disc_seg/train_list.txt', @@ -38,6 +33,8 @@ eval_dataset = pdx.datasets.SegDataset( # VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001 # 浏览器打开 https://0.0.0.0:8001即可 # 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP + +# https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet num_classes = len(train_dataset.labels) model = pdx.seg.HRNet(num_classes=num_classes) model.train( diff --git a/new_tutorials/train/segmentation/unet.py b/new_tutorials/train/segmentation/unet.py index a683af98322eacb9d0775b3a5256d900f5743bb2..ddf4f7991a690b0d0d506967df0c140f60945e85 100644 --- a/new_tutorials/train/segmentation/unet.py +++ b/new_tutorials/train/segmentation/unet.py @@ -10,20 +10,12 @@ optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz' pdx.utils.download_and_decompress(optic_dataset, path='./') # 定义训练和验证时的transforms -train_transforms = transforms.Compose([ - transforms.RandomHorizontalFlip(), - transforms.ResizeRangeScaling(), - transforms.RandomPaddingCrop(crop_size=512), - transforms.Normalize() -]) - -eval_transforms = transforms.Compose([ - transforms.ResizeByLong(long_size=512), - transforms.Padding(target_size=512), - transforms.Normalize() -]) +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms +train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769]) +eval_transforms = transforms.ComposedSegTransforms(mode='eval') # 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset train_dataset = pdx.datasets.SegDataset( data_dir='optic_disc_seg', file_list='optic_disc_seg/train_list.txt', @@ -41,6 +33,8 @@ eval_dataset = pdx.datasets.SegDataset( # VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001 # 浏览器打开 https://0.0.0.0:8001即可 # 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP + +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet num_classes = len(train_dataset.labels) model = pdx.seg.UNet(num_classes=num_classes) model.train( diff --git a/paddlex/__init__.py b/paddlex/__init__.py index d1656161a0d764c0a7fbd125f246d0e43125bcda..b80363f2e6adfdbd6ce712cfec486540753abbb7 100644 --- a/paddlex/__init__.py +++ b/paddlex/__init__.py @@ -53,4 +53,4 @@ log_level = 2 from . import interpret -__version__ = '1.0.5' +__version__ = '1.0.6' diff --git a/paddlex/cls.py b/paddlex/cls.py index 0dce289d7ee77c9559a4fce2104cca8786b81f52..90c5eefce512c966a04975ebfe6457613012c872 100644 --- a/paddlex/cls.py +++ b/paddlex/cls.py @@ -37,5 +37,6 @@ DenseNet161 = cv.models.DenseNet161 DenseNet201 = cv.models.DenseNet201 ShuffleNetV2 = cv.models.ShuffleNetV2 HRNet_W18 = cv.models.HRNet_W18 +AlexNet = cv.models.AlexNet transforms = cv.transforms.cls_transforms diff --git a/paddlex/cv/datasets/coco.py b/paddlex/cv/datasets/coco.py index ff7c2b2d2438fb88d359c94f9ede65d900d9216e..97e791be5ed3cac1656fba4429d90f1653bfe1be 100644 --- a/paddlex/cv/datasets/coco.py +++ b/paddlex/cv/datasets/coco.py @@ -128,7 +128,6 @@ class CocoDetection(VOCDetection): coco_rec = (im_info, label_info) self.file_list.append([im_fname, coco_rec]) - if not len(self.file_list) > 0: raise Exception('not found any coco record in %s' % (ann_file)) logging.info("{} samples in file {}".format( diff --git a/paddlex/cv/datasets/voc.py b/paddlex/cv/datasets/voc.py index 45335084a9f8ddd9b850b52f3a6db9fe6886a153..9b2e8528c52d5f2ecd6a041bbf7e86f095ea35ac 100644 --- a/paddlex/cv/datasets/voc.py +++ b/paddlex/cv/datasets/voc.py @@ -14,6 +14,7 @@ from __future__ import absolute_import import copy +import os import os.path as osp import random import numpy as np @@ -170,6 +171,44 @@ class VOCDetection(Dataset): self.coco_gt.dataset = annotations self.coco_gt.createIndex() + def add_negative_samples(self, image_dir): + import cv2 + if not osp.exists(image_dir): + raise Exception("{} background images directory does not exist.". + format(image_dir)) + image_list = os.listdir(image_dir) + max_img_id = max(self.coco_gt.getImgIds()) + for image in image_list: + if not is_pic(image): + continue + # False ground truth + gt_bbox = np.array([[0, 0, 1e-05, 1e-05]], dtype=np.float32) + gt_class = np.array([[0]], dtype=np.int32) + gt_score = np.ones((1, 1), dtype=np.float32) + is_crowd = np.array([[0]], dtype=np.int32) + difficult = np.zeros((1, 1), dtype=np.int32) + gt_poly = [[[0, 0, 0, 1e-05, 1e-05, 1e-05, 1e-05, 0]]] + + max_img_id += 1 + im_fname = osp.join(image_dir, image) + img_data = cv2.imread(im_fname) + im_h, im_w, im_c = img_data.shape + im_info = { + 'im_id': np.array([max_img_id]).astype('int32'), + 'image_shape': np.array([im_h, im_w]).astype('int32'), + } + label_info = { + 'is_crowd': is_crowd, + 'gt_class': gt_class, + 'gt_bbox': gt_bbox, + 'gt_score': gt_score, + 'difficult': difficult, + 'gt_poly': gt_poly + } + coco_rec = (im_info, label_info) + self.file_list.append([im_fname, coco_rec]) + self.num_samples = len(self.file_list) + def iterator(self): self._epoch += 1 self._pos = 0 diff --git a/paddlex/cv/models/__init__.py b/paddlex/cv/models/__init__.py index bff0faae2087da72a66de24b93b1700599d6d29c..1c7e4b35bc7387c3f5c536e74edc0feafa1811d9 100644 --- a/paddlex/cv/models/__init__.py +++ b/paddlex/cv/models/__init__.py @@ -35,6 +35,7 @@ from .classifier import DenseNet161 from .classifier import DenseNet201 from .classifier import ShuffleNetV2 from .classifier import HRNet_W18 +from .classifier import AlexNet from .base import BaseAPI from .yolo_v3 import YOLOv3 from .faster_rcnn import FasterRCNN diff --git a/paddlex/cv/models/base.py b/paddlex/cv/models/base.py index 9e4d93ade1df2e1b92312ae0e81b4d203268b0f1..d15459c0bc318207b5bcf9593dfaaf676437fe27 100644 --- a/paddlex/cv/models/base.py +++ b/paddlex/cv/models/base.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import paddle.fluid as fluid @@ -245,8 +245,8 @@ class BaseAPI: logging.info( "Load pretrain weights from {}.".format(pretrain_weights), use_color=True) - paddlex.utils.utils.load_pretrain_weights( - self.exe, self.train_prog, pretrain_weights, fuse_bn) + paddlex.utils.utils.load_pretrain_weights(self.exe, self.train_prog, + pretrain_weights, fuse_bn) # 进行裁剪 if sensitivities_file is not None: import paddleslim @@ -286,6 +286,7 @@ class BaseAPI: info['_Attributes']['num_classes'] = self.num_classes info['_Attributes']['labels'] = self.labels + info['_Attributes']['fixed_input_shape'] = self.fixed_input_shape try: primary_metric_key = list(self.eval_metrics.keys())[0] primary_metric_value = float(self.eval_metrics[primary_metric_key]) @@ -349,9 +350,7 @@ class BaseAPI: logging.info("Model saved in {}.".format(save_dir)) def export_inference_model(self, save_dir): - test_input_names = [ - var.name for var in list(self.test_inputs.values()) - ] + test_input_names = [var.name for var in list(self.test_inputs.values())] test_outputs = list(self.test_outputs.values()) if self.__class__.__name__ == 'MaskRCNN': from paddlex.utils.save import save_mask_inference_model @@ -388,8 +387,7 @@ class BaseAPI: # 模型保存成功的标志 open(osp.join(save_dir, '.success'), 'w').close() - logging.info("Model for inference deploy saved in {}.".format( - save_dir)) + logging.info("Model for inference deploy saved in {}.".format(save_dir)) def train_loop(self, num_epochs, @@ -513,13 +511,11 @@ class BaseAPI: eta = ((num_epochs - i) * total_num_steps - step - 1 ) * avg_step_time if time_eval_one_epoch is not None: - eval_eta = ( - total_eval_times - i // save_interval_epochs - ) * time_eval_one_epoch + eval_eta = (total_eval_times - i // save_interval_epochs + ) * time_eval_one_epoch else: - eval_eta = ( - total_eval_times - i // save_interval_epochs - ) * total_num_steps_eval * avg_step_time + eval_eta = (total_eval_times - i // save_interval_epochs + ) * total_num_steps_eval * avg_step_time eta_str = seconds_to_hms(eta + eval_eta) logging.info( @@ -536,6 +532,7 @@ class BaseAPI: epoch_start_time = time.time() # 每间隔save_interval_epochs, 在验证集上评估和对模型进行保存 + self.completed_epochs += 1 eval_epoch_start_time = time.time() if (i + 1) % save_interval_epochs == 0 or i == num_epochs - 1: current_save_dir = osp.join(save_dir, "epoch_{}".format(i + 1)) @@ -549,7 +546,6 @@ class BaseAPI: return_details=True) logging.info('[EVAL] Finished, Epoch={}, {} .'.format( i + 1, dict2str(self.eval_metrics))) - self.completed_epochs += 1 # 保存最优模型 best_accuracy_key = list(self.eval_metrics.keys())[0] current_accuracy = self.eval_metrics[best_accuracy_key] diff --git a/paddlex/cv/models/classifier.py b/paddlex/cv/models/classifier.py index ab746ddf7ef7af2baf3da951879f2fdab5b4b8e4..17a307d8bdeed77467535bec1216cc9b97bd70e3 100644 --- a/paddlex/cv/models/classifier.py +++ b/paddlex/cv/models/classifier.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import numpy as np @@ -48,12 +48,13 @@ class BaseClassifier(BaseAPI): self.fixed_input_shape = None def build_net(self, mode='train'): + if self.__class__.__name__ == "AlexNet": + assert self.fixed_input_shape is not None, "In AlexNet, input_shape should be defined, e.g. model = paddlex.cls.AlexNet(num_classes=1000, input_shape=[224, 224])" if self.fixed_input_shape is not None: input_shape = [ None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0] ] - image = fluid.data( - dtype='float32', shape=input_shape, name='image') + image = fluid.data(dtype='float32', shape=input_shape, name='image') else: image = fluid.data( dtype='float32', shape=[None, 3, None, None], name='image') @@ -81,7 +82,8 @@ class BaseClassifier(BaseAPI): del outputs['loss'] return inputs, outputs - def default_optimizer(self, learning_rate, lr_decay_epochs, lr_decay_gamma, + def default_optimizer(self, learning_rate, warmup_steps, warmup_start_lr, + lr_decay_epochs, lr_decay_gamma, num_steps_each_epoch): boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] values = [ @@ -90,6 +92,24 @@ class BaseClassifier(BaseAPI): ] lr_decay = fluid.layers.piecewise_decay( boundaries=boundaries, values=values) + if warmup_steps > 0: + if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch: + logging.error( + "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset", + exit=False) + logging.error( + "See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice", + exit=False) + logging.error( + "warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function". + format(lr_decay_epochs[0] * num_steps_each_epoch, + warmup_steps // num_steps_each_epoch)) + + lr_decay = fluid.layers.linear_lr_warmup( + learning_rate=lr_decay, + warmup_steps=warmup_steps, + start_lr=warmup_start_lr, + end_lr=learning_rate) optimizer = fluid.optimizer.Momentum( lr_decay, momentum=0.9, @@ -107,6 +127,8 @@ class BaseClassifier(BaseAPI): pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, + warmup_steps=0, + warmup_start_lr=0.0, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, @@ -129,6 +151,8 @@ class BaseClassifier(BaseAPI): optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器: fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 learning_rate (float): 默认优化器的初始学习率。默认为0.025。 + warmup_steps(int): 学习率从warmup_start_lr上升至设定的learning_rate,所需的步数,默认为0 + warmup_start_lr(float): 学习率在warmup阶段时的起始值,默认为0.0 lr_decay_epochs (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。 lr_decay_gamma (float): 默认优化器的学习率衰减率。默认为0.1。 use_vdl (bool): 是否使用VisualDL进行可视化。默认值为False。 @@ -149,6 +173,8 @@ class BaseClassifier(BaseAPI): num_steps_each_epoch = train_dataset.num_samples // train_batch_size optimizer = self.default_optimizer( learning_rate=learning_rate, + warmup_steps=warmup_steps, + warmup_start_lr=warmup_start_lr, lr_decay_epochs=lr_decay_epochs, lr_decay_gamma=lr_decay_gamma, num_steps_each_epoch=num_steps_each_epoch) @@ -193,8 +219,7 @@ class BaseClassifier(BaseAPI): tuple (metrics, eval_details): 当return_details为True时,增加返回dict, 包含关键字:'true_labels'、'pred_scores',分别代表真实类别id、每个类别的预测得分。 """ - self.arrange_transforms( - transforms=eval_dataset.transforms, mode='eval') + self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval') data_generator = eval_dataset.generator( batch_size=batch_size, drop_last=False) k = min(5, self.num_classes) @@ -206,9 +231,8 @@ class BaseClassifier(BaseAPI): self.test_prog).with_data_parallel( share_vars_from=self.parallel_train_prog) batch_size_each_gpu = self._get_single_card_bs(batch_size) - logging.info( - "Start to evaluating(total_samples={}, total_steps={})...".format( - eval_dataset.num_samples, total_steps)) + logging.info("Start to evaluating(total_samples={}, total_steps={})...". + format(eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm( enumerate(data_generator()), total=total_steps): images = np.array([d[0] for d in data]).astype('float32') @@ -405,3 +429,10 @@ class HRNet_W18(BaseClassifier): def __init__(self, num_classes=1000): super(HRNet_W18, self).__init__( model_name='HRNet_W18', num_classes=num_classes) + + +class AlexNet(BaseClassifier): + def __init__(self, num_classes=1000, input_shape=None): + super(AlexNet, self).__init__( + model_name='AlexNet', num_classes=num_classes) + self.fixed_input_shape = input_shape diff --git a/paddlex/cv/models/deeplabv3p.py b/paddlex/cv/models/deeplabv3p.py index 1d65f093825e3a11d1cbbb7fc4e81b03e5f8b532..e548439a7ed81fd5758395244d26926d3c8010fe 100644 --- a/paddlex/cv/models/deeplabv3p.py +++ b/paddlex/cv/models/deeplabv3p.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import os.path as osp @@ -339,7 +339,8 @@ class DeepLabv3p(BaseAPI): for d in data: padding_label = np.zeros( (1, im_h, im_w)).astype('int64') + self.ignore_index - padding_label[:, :im_h, :im_w] = d[1] + _, label_h, label_w = d[1].shape + padding_label[:, :label_h, :label_w] = d[1] labels.append(padding_label) labels = np.array(labels) diff --git a/paddlex/cv/models/faster_rcnn.py b/paddlex/cv/models/faster_rcnn.py index 85ac866709f3a0efaa461a226de215e65a60f6f0..45279bfc6014329ced089d39072221ceaf8dd683 100644 --- a/paddlex/cv/models/faster_rcnn.py +++ b/paddlex/cv/models/faster_rcnn.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import math @@ -138,8 +138,16 @@ class FasterRCNN(BaseAPI): lr_decay_epochs, lr_decay_gamma, num_steps_each_epoch): if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch: - raise Exception("warmup_steps should less than {}".format( - lr_decay_epochs[0] * num_steps_each_epoch)) + logging.error( + "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset", + exit=False) + logging.error( + "See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice", + exit=False) + logging.error( + "warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function". + format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps + // num_steps_each_epoch)) boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] values = [(lr_decay_gamma**i) * learning_rate for i in range(len(lr_decay_epochs) + 1)] @@ -283,8 +291,7 @@ class FasterRCNN(BaseAPI): eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、 预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。 """ - self.arrange_transforms( - transforms=eval_dataset.transforms, mode='eval') + self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval') if metric is None: if hasattr(self, 'metric') and self.metric is not None: metric = self.metric @@ -303,14 +310,12 @@ class FasterRCNN(BaseAPI): logging.warning( "Faster RCNN supports batch_size=1 only during evaluating, so batch_size is forced to be set to 1." ) - dataset = eval_dataset.generator( - batch_size=batch_size, drop_last=False) + dataset = eval_dataset.generator(batch_size=batch_size, drop_last=False) total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) results = list() - logging.info( - "Start to evaluating(total_samples={}, total_steps={})...".format( - eval_dataset.num_samples, total_steps)) + logging.info("Start to evaluating(total_samples={}, total_steps={})...". + format(eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm(enumerate(dataset()), total=total_steps): images = np.array([d[0] for d in data]).astype('float32') im_infos = np.array([d[1] for d in data]).astype('float32') diff --git a/paddlex/cv/models/hrnet.py b/paddlex/cv/models/hrnet.py index 6623a898f3f6af30f2391258d358a0454cc02ecd..3a000feee5fe6a2b6a93662e1dc65754d6e1cd68 100644 --- a/paddlex/cv/models/hrnet.py +++ b/paddlex/cv/models/hrnet.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import paddle.fluid as fluid @@ -77,6 +77,7 @@ class HRNet(DeepLabv3p): self.class_weight = class_weight self.ignore_index = ignore_index self.labels = None + self.fixed_input_shape = None def build_net(self, mode='train'): model = paddlex.cv.nets.segmentation.HRNet( @@ -86,7 +87,8 @@ class HRNet(DeepLabv3p): use_bce_loss=self.use_bce_loss, use_dice_loss=self.use_dice_loss, class_weight=self.class_weight, - ignore_index=self.ignore_index) + ignore_index=self.ignore_index, + fixed_input_shape=self.fixed_input_shape) inputs = model.generate_inputs() model_out = model.build_net(inputs) outputs = OrderedDict() @@ -166,6 +168,6 @@ class HRNet(DeepLabv3p): return super(HRNet, self).train( num_epochs, train_dataset, train_batch_size, eval_dataset, save_interval_epochs, log_interval_steps, save_dir, - pretrain_weights, optimizer, learning_rate, lr_decay_power, - use_vdl, sensitivities_file, eval_metric_loss, early_stop, + pretrain_weights, optimizer, learning_rate, lr_decay_power, use_vdl, + sensitivities_file, eval_metric_loss, early_stop, early_stop_patience, resume_checkpoint) diff --git a/paddlex/cv/models/load_model.py b/paddlex/cv/models/load_model.py index 738f4ff00452d278b3988d9303bb15b0d8885979..87b30ac47c206f0b3723ffcf353d95078feeb892 100644 --- a/paddlex/cv/models/load_model.py +++ b/paddlex/cv/models/load_model.py @@ -41,7 +41,16 @@ def load_model(model_dir, fixed_input_shape=None): if 'model_name' in info['_init_params']: del info['_init_params']['model_name'] model = getattr(paddlex.cv.models, info['Model'])(**info['_init_params']) + model.fixed_input_shape = fixed_input_shape + if '_Attributes' in info: + if 'fixed_input_shape' in info['_Attributes']: + fixed_input_shape = info['_Attributes']['fixed_input_shape'] + if fixed_input_shape is not None: + logging.info("Model already has fixed_input_shape with {}". + format(fixed_input_shape)) + model.fixed_input_shape = fixed_input_shape + if status == "Normal" or \ status == "Prune" or status == "fluid.save": startup_prog = fluid.Program() @@ -88,8 +97,8 @@ def load_model(model_dir, fixed_input_shape=None): model.model_type, info['Transforms'], info['BatchTransforms']) model.eval_transforms = copy.deepcopy(model.test_transforms) else: - model.test_transforms = build_transforms( - model.model_type, info['Transforms'], to_rgb) + model.test_transforms = build_transforms(model.model_type, + info['Transforms'], to_rgb) model.eval_transforms = copy.deepcopy(model.test_transforms) if '_Attributes' in info: @@ -107,20 +116,7 @@ def fix_input_shape(info, fixed_input_shape=None): resize = {'ResizeByShort': {}} padding = {'Padding': {}} if info['_Attributes']['model_type'] == 'classifier': - crop_size = 0 - for transform in info['Transforms']: - if 'CenterCrop' in transform: - crop_size = transform['CenterCrop']['crop_size'] - break - assert crop_size == fixed_input_shape[ - 0], "fixed_input_shape must == CenterCrop:crop_size:{}".format( - crop_size) - assert crop_size == fixed_input_shape[ - 1], "fixed_input_shape must == CenterCrop:crop_size:{}".format( - crop_size) - if crop_size == 0: - logging.warning( - "fixed_input_shape must == input shape when trainning") + pass else: resize['ResizeByShort']['short_size'] = min(fixed_input_shape) resize['ResizeByShort']['max_size'] = max(fixed_input_shape) diff --git a/paddlex/cv/models/mask_rcnn.py b/paddlex/cv/models/mask_rcnn.py index d370a828870f2b5fafb543b25099c90618e17e84..26d5e5cb4edc58be0fffaf6d778058c5846c1929 100644 --- a/paddlex/cv/models/mask_rcnn.py +++ b/paddlex/cv/models/mask_rcnn.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import math @@ -97,8 +97,16 @@ class MaskRCNN(FasterRCNN): lr_decay_epochs, lr_decay_gamma, num_steps_each_epoch): if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch: - raise Exception("warmup_step should less than {}".format( - lr_decay_epochs[0] * num_steps_each_epoch)) + logging.error( + "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset", + exit=False) + logging.error( + "See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice", + exit=False) + logging.error( + "warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function". + format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps + // num_steps_each_epoch)) boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] values = [(lr_decay_gamma**i) * learning_rate for i in range(len(lr_decay_epochs) + 1)] @@ -245,8 +253,7 @@ class MaskRCNN(FasterRCNN): 预测框坐标、预测框得分;'mask',对应元素预测区域结果列表,每个预测结果由图像id、 预测区域类别id、预测区域坐标、预测区域得分;’gt‘:真实标注框和标注区域相关信息。 """ - self.arrange_transforms( - transforms=eval_dataset.transforms, mode='eval') + self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval') if metric is None: if hasattr(self, 'metric') and self.metric is not None: metric = self.metric @@ -267,9 +274,8 @@ class MaskRCNN(FasterRCNN): total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) results = list() - logging.info( - "Start to evaluating(total_samples={}, total_steps={})...".format( - eval_dataset.num_samples, total_steps)) + logging.info("Start to evaluating(total_samples={}, total_steps={})...". + format(eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm( enumerate(data_generator()), total=total_steps): images = np.array([d[0] for d in data]).astype('float32') @@ -311,8 +317,7 @@ class MaskRCNN(FasterRCNN): zip(['bbox_map', 'segm_map'], [ap_stats[0][1], ap_stats[1][1]])) else: - metrics = OrderedDict( - zip(['bbox_map', 'segm_map'], [0.0, 0.0])) + metrics = OrderedDict(zip(['bbox_map', 'segm_map'], [0.0, 0.0])) elif metric == 'COCO': if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1], np.ndarray): diff --git a/paddlex/cv/models/slim/prune.py b/paddlex/cv/models/slim/prune.py index 41bc2d62e152aaa362490e527655bab4bbcca0f6..ad4dec23b8e3b29eda30fa873f4baa625a004884 100644 --- a/paddlex/cv/models/slim/prune.py +++ b/paddlex/cv/models/slim/prune.py @@ -115,6 +115,21 @@ def channel_prune(program, prune_names, prune_ratios, place, only_graph=False): Returns: paddle.fluid.Program: 裁剪后的Program。 """ + prog_var_shape_dict = {} + for var in program.list_vars(): + try: + prog_var_shape_dict[var.name] = var.shape + except Exception: + pass + index = 0 + for param, ratio in zip(prune_names, prune_ratios): + origin_num = prog_var_shape_dict[param][0] + pruned_num = int(round(origin_num * ratio)) + while origin_num == pruned_num: + ratio -= 0.1 + pruned_num = int(round(origin_num * (ratio))) + prune_ratios[index] = ratio + index += 1 scope = fluid.global_scope() pruner = Pruner() program, _, _ = pruner.prune( @@ -266,8 +281,8 @@ def get_params_ratios(sensitivities_file, eval_metric_loss=0.05): if not osp.exists(sensitivities_file): raise Exception('The sensitivities file is not exists!') sensitivitives = paddleslim.prune.load_sensitivities(sensitivities_file) - params_ratios = paddleslim.prune.get_ratios_by_loss(sensitivitives, - eval_metric_loss) + params_ratios = paddleslim.prune.get_ratios_by_loss( + sensitivitives, eval_metric_loss) return params_ratios @@ -286,6 +301,19 @@ def cal_model_size(program, place, sensitivities_file, eval_metric_loss=0.05): """ prune_params_ratios = get_params_ratios(sensitivities_file, eval_metric_loss) + prog_var_shape_dict = {} + for var in program.list_vars(): + try: + prog_var_shape_dict[var.name] = var.shape + except Exception: + pass + for param, ratio in prune_params_ratios.items(): + origin_num = prog_var_shape_dict[param][0] + pruned_num = int(round(origin_num * ratio)) + while origin_num == pruned_num: + ratio -= 0.1 + pruned_num = int(round(origin_num * (ratio))) + prune_params_ratios[param] = ratio prune_program = channel_prune( program, list(prune_params_ratios.keys()), diff --git a/paddlex/cv/models/slim/prune_config.py b/paddlex/cv/models/slim/prune_config.py index 34675a53b8bda539f3aa5cfc0adf374f449df303..49430e9bfb1dcc47fb93aa9fc7d05ceb21e2b9e8 100644 --- a/paddlex/cv/models/slim/prune_config.py +++ b/paddlex/cv/models/slim/prune_config.py @@ -142,13 +142,16 @@ def get_prune_params(model): program = model.test_prog if model_type.startswith('ResNet') or \ model_type.startswith('DenseNet') or \ - model_type.startswith('DarkNet'): + model_type.startswith('DarkNet') or \ + model_type.startswith('AlexNet'): for block in program.blocks: for param in block.all_parameters(): pd_var = fluid.global_scope().find_var(param.name) pd_param = pd_var.get_tensor() if len(np.array(pd_param).shape) == 4: prune_names.append(param.name) + if model_type == 'AlexNet': + prune_names.remove('conv5_weights') elif model_type == "MobileNetV1": prune_names.append("conv1_weights") for param in program.global_block().all_parameters(): @@ -162,7 +165,7 @@ def get_prune_params(model): continue prune_names.append(param.name) elif model_type.startswith("MobileNetV3"): - if model_type == 'MobileNetV3_small': + if model_type.startswith('MobileNetV3_small'): expand_prune_id = [3, 4] else: expand_prune_id = [2, 3, 4, 8, 9, 11] diff --git a/paddlex/cv/models/unet.py b/paddlex/cv/models/unet.py index e47dd327383be71aed7b3364a69ee6ebf0b6b924..34c597b0e190122c3ba80c485378273abff20b65 100644 --- a/paddlex/cv/models/unet.py +++ b/paddlex/cv/models/unet.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import paddlex diff --git a/paddlex/cv/models/utils/detection_eval.py b/paddlex/cv/models/utils/detection_eval.py index b9dcdaa029265483c2b9fb919426686c36a411f5..d2c0ae8abf867baddfc767bd6e1a73cf5d36ea3d 100644 --- a/paddlex/cv/models/utils/detection_eval.py +++ b/paddlex/cv/models/utils/detection_eval.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import diff --git a/paddlex/cv/models/utils/pretrain_weights.py b/paddlex/cv/models/utils/pretrain_weights.py index 03aa9d7be3a5876c25552276ba7029a6c9f0425d..af8a6aa2af452914462bb305e6a03fadc7f2836c 100644 --- a/paddlex/cv/models/utils/pretrain_weights.py +++ b/paddlex/cv/models/utils/pretrain_weights.py @@ -71,6 +71,8 @@ image_pretrain = { 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W60_C_pretrained.tar', 'HRNet_W64': 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W64_C_pretrained.tar', + 'AlexNet': + 'http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar' } coco_pretrain = { @@ -185,10 +187,12 @@ def get_pretrain_weights(flag, class_name, backbone, save_dir): backbone = 'DetResNet50' assert backbone in image_pretrain, "There is not ImageNet pretrain weights for {}, you may try COCO.".format( backbone) - # url = image_pretrain[backbone] - # fname = osp.split(url)[-1].split('.')[0] - # paddlex.utils.download_and_decompress(url, path=new_save_dir) - # return osp.join(new_save_dir, fname) + + # if backbone == 'AlexNet': + # url = image_pretrain[backbone] + # fname = osp.split(url)[-1].split('.')[0] + # paddlex.utils.download_and_decompress(url, path=new_save_dir) + # return osp.join(new_save_dir, fname) try: hub.download(backbone, save_path=new_save_dir) except Exception as e: diff --git a/paddlex/cv/models/utils/visualize.py b/paddlex/cv/models/utils/visualize.py index 6982bdf62993598ab8f0d42e09af2e303a7957bb..89875114f13e2b275019e3a65fc19576013dd68a 100644 --- a/paddlex/cv/models/utils/visualize.py +++ b/paddlex/cv/models/utils/visualize.py @@ -1,17 +1,18 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -* import os import cv2 import colorsys diff --git a/paddlex/cv/models/yolo_v3.py b/paddlex/cv/models/yolo_v3.py index aeef3a1bba21b42394bcdb024f648036b0a8d854..85ee89fc86851ff9be104d0ee258eefce9843a69 100644 --- a/paddlex/cv/models/yolo_v3.py +++ b/paddlex/cv/models/yolo_v3.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import math @@ -128,8 +128,16 @@ class YOLOv3(BaseAPI): lr_decay_epochs, lr_decay_gamma, num_steps_each_epoch): if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch: - raise Exception("warmup_steps should less than {}".format( - lr_decay_epochs[0] * num_steps_each_epoch)) + logging.error( + "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset", + exit=False) + logging.error( + "See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice", + exit=False) + logging.error( + "warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function". + format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps + // num_steps_each_epoch)) boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] values = [(lr_decay_gamma**i) * learning_rate for i in range(len(lr_decay_epochs) + 1)] @@ -278,8 +286,7 @@ class YOLOv3(BaseAPI): eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、 预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。 """ - self.arrange_transforms( - transforms=eval_dataset.transforms, mode='eval') + self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval') if metric is None: if hasattr(self, 'metric') and self.metric is not None: metric = self.metric @@ -299,9 +306,8 @@ class YOLOv3(BaseAPI): data_generator = eval_dataset.generator( batch_size=batch_size, drop_last=False) - logging.info( - "Start to evaluating(total_samples={}, total_steps={})...".format( - eval_dataset.num_samples, total_steps)) + logging.info("Start to evaluating(total_samples={}, total_steps={})...". + format(eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm( enumerate(data_generator()), total=total_steps): images = np.array([d[0] for d in data]) diff --git a/paddlex/cv/nets/__init__.py b/paddlex/cv/nets/__init__.py index 6bf7581ee41d1d428670bbbdffbc8919186e5aef..5b427fe31be957f92611f7cfc6a9e6102a3c9616 100644 --- a/paddlex/cv/nets/__init__.py +++ b/paddlex/cv/nets/__init__.py @@ -25,6 +25,7 @@ from .xception import Xception from .densenet import DenseNet from .shufflenet_v2 import ShuffleNetV2 from .hrnet import HRNet +from .alexnet import AlexNet def resnet18(input, num_classes=1000): @@ -154,3 +155,8 @@ def shufflenetv2(input, num_classes=1000): def hrnet_w18(input, num_classes=1000): model = HRNet(width=18, num_classes=num_classes) return model(input) + + +def alexnet(input, num_classes=1000): + model = AlexNet(num_classes=num_classes) + return model(input) diff --git a/paddlex/cv/nets/alexnet.py b/paddlex/cv/nets/alexnet.py new file mode 100644 index 0000000000000000000000000000000000000000..6770f437d982428cd8d5ed7edb44e00915754139 --- /dev/null +++ b/paddlex/cv/nets/alexnet.py @@ -0,0 +1,170 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid + + +class AlexNet(): + def __init__(self, num_classes=1000): + assert num_classes is not None, "In AlextNet, num_classes cannot be None" + self.num_classes = num_classes + + def __call__(self, input): + stdv = 1.0 / math.sqrt(input.shape[1] * 11 * 11) + layer_name = [ + "conv1", "conv2", "conv3", "conv4", "conv5", "fc6", "fc7", "fc8" + ] + conv1 = fluid.layers.conv2d( + input=input, + num_filters=64, + filter_size=11, + stride=4, + padding=2, + groups=1, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[0] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[0] + "_weights")) + pool1 = fluid.layers.pool2d( + input=conv1, + pool_size=3, + pool_stride=2, + pool_padding=0, + pool_type='max') + + stdv = 1.0 / math.sqrt(pool1.shape[1] * 5 * 5) + conv2 = fluid.layers.conv2d( + input=pool1, + num_filters=192, + filter_size=5, + stride=1, + padding=2, + groups=1, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[1] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[1] + "_weights")) + pool2 = fluid.layers.pool2d( + input=conv2, + pool_size=3, + pool_stride=2, + pool_padding=0, + pool_type='max') + + stdv = 1.0 / math.sqrt(pool2.shape[1] * 3 * 3) + conv3 = fluid.layers.conv2d( + input=pool2, + num_filters=384, + filter_size=3, + stride=1, + padding=1, + groups=1, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[2] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[2] + "_weights")) + + stdv = 1.0 / math.sqrt(conv3.shape[1] * 3 * 3) + conv4 = fluid.layers.conv2d( + input=conv3, + num_filters=256, + filter_size=3, + stride=1, + padding=1, + groups=1, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[3] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[3] + "_weights")) + + stdv = 1.0 / math.sqrt(conv4.shape[1] * 3 * 3) + conv5 = fluid.layers.conv2d( + input=conv4, + num_filters=256, + filter_size=3, + stride=1, + padding=1, + groups=1, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[4] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[4] + "_weights")) + pool5 = fluid.layers.pool2d( + input=conv5, + pool_size=3, + pool_stride=2, + pool_padding=0, + pool_type='max') + + drop6 = fluid.layers.dropout(x=pool5, dropout_prob=0.5) + stdv = 1.0 / math.sqrt(drop6.shape[1] * drop6.shape[2] * + drop6.shape[3] * 1.0) + + fc6 = fluid.layers.fc( + input=drop6, + size=4096, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[5] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[5] + "_weights")) + drop7 = fluid.layers.dropout(x=fc6, dropout_prob=0.5) + stdv = 1.0 / math.sqrt(drop7.shape[1] * 1.0) + + fc7 = fluid.layers.fc( + input=drop7, + size=4096, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[6] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[6] + "_weights")) + + stdv = 1.0 / math.sqrt(fc7.shape[1] * 1.0) + out = fluid.layers.fc( + input=fc7, + size=self.num_classes, + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[7] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[7] + "_weights")) + return out diff --git a/paddlex/cv/nets/densenet.py b/paddlex/cv/nets/densenet.py index a7238b2cd8775f20210d04d41f6caa1343c68092..76997c48de412e52cf914c32057f8a1bd0c06f9d 100644 --- a/paddlex/cv/nets/densenet.py +++ b/paddlex/cv/nets/densenet.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/paddlex/cv/nets/hrnet.py b/paddlex/cv/nets/hrnet.py index 19f9cb336bce66a7dc68d65e316440adf46857e4..a7934d385d4a53fd936410e37d3896fe21cb17ee 100644 --- a/paddlex/cv/nets/hrnet.py +++ b/paddlex/cv/nets/hrnet.py @@ -71,7 +71,7 @@ class HRNet(object): self.end_points = [] return - def net(self, input, class_dim=1000): + def net(self, input): width = self.width channels_2, channels_3, channels_4 = self.channels[width] num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3 @@ -125,7 +125,7 @@ class HRNet(object): stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) out = fluid.layers.fc( input=pool, - size=class_dim, + size=self.num_classes, param_attr=ParamAttr( name='fc_weights', initializer=fluid.initializer.Uniform(-stdv, stdv)), diff --git a/paddlex/cv/nets/segmentation/hrnet.py b/paddlex/cv/nets/segmentation/hrnet.py index d55bb8685e7d76c3c276bc04599b1909cde33a01..6c7d8d93692e40047fa4ceb2f4153c18cee06ccd 100644 --- a/paddlex/cv/nets/segmentation/hrnet.py +++ b/paddlex/cv/nets/segmentation/hrnet.py @@ -37,7 +37,8 @@ class HRNet(object): use_bce_loss=False, use_dice_loss=False, class_weight=None, - ignore_index=255): + ignore_index=255, + fixed_input_shape=None): # dice_loss或bce_loss只适用两类分割中 if num_classes > 2 and (use_bce_loss or use_dice_loss): raise ValueError( @@ -65,6 +66,7 @@ class HRNet(object): self.use_dice_loss = use_dice_loss self.class_weight = class_weight self.ignore_index = ignore_index + self.fixed_input_shape = fixed_input_shape self.backbone = paddlex.cv.nets.hrnet.HRNet( width=width, feature_maps="stage4") @@ -130,8 +132,16 @@ class HRNet(object): def generate_inputs(self): inputs = OrderedDict() - inputs['image'] = fluid.data( - dtype='float32', shape=[None, 3, None, None], name='image') + + if self.fixed_input_shape is not None: + input_shape = [ + None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0] + ] + inputs['image'] = fluid.data( + dtype='float32', shape=input_shape, name='image') + else: + inputs['image'] = fluid.data( + dtype='float32', shape=[None, 3, None, None], name='image') if self.mode == 'train': inputs['label'] = fluid.data( dtype='int32', shape=[None, 1, None, None], name='label') diff --git a/paddlex/cv/nets/shufflenet_v2.py b/paddlex/cv/nets/shufflenet_v2.py index f663535c31f9c899799b696351ba415049c83bf6..23045ee0d7279011ad93160e778dfd88862b9953 100644 --- a/paddlex/cv/nets/shufflenet_v2.py +++ b/paddlex/cv/nets/shufflenet_v2.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division @@ -269,4 +269,4 @@ class ShuffleNetV2(): name='stage_' + name + '_conv3') out = fluid.layers.concat([conv_linear_1, conv_linear_2], axis=1) - return self.channel_shuffle(out, 2) \ No newline at end of file + return self.channel_shuffle(out, 2) diff --git a/paddlex/cv/transforms/det_transforms.py b/paddlex/cv/transforms/det_transforms.py index 6ec3570c192fd3fa6bc174a9f601b250c3b2b651..0b96d6b4d32f245ec4315851d8edd221776bb6a0 100644 --- a/paddlex/cv/transforms/det_transforms.py +++ b/paddlex/cv/transforms/det_transforms.py @@ -727,22 +727,38 @@ class MixupImage(DetTransform): 'Becasuse gt_bbox/gt_class/gt_score is not in label_info!') gt_bbox1 = label_info['gt_bbox'] gt_bbox2 = im_info['mixup'][2]['gt_bbox'] - gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0) gt_class1 = label_info['gt_class'] gt_class2 = im_info['mixup'][2]['gt_class'] - gt_class = np.concatenate((gt_class1, gt_class2), axis=0) - gt_score1 = label_info['gt_score'] gt_score2 = im_info['mixup'][2]['gt_score'] - gt_score = np.concatenate( - (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0) if 'gt_poly' in label_info: gt_poly1 = label_info['gt_poly'] gt_poly2 = im_info['mixup'][2]['gt_poly'] - label_info['gt_poly'] = gt_poly1 + gt_poly2 is_crowd1 = label_info['is_crowd'] is_crowd2 = im_info['mixup'][2]['is_crowd'] - is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0) + + if 0 not in gt_class1 and 0 not in gt_class2: + gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0) + gt_class = np.concatenate((gt_class1, gt_class2), axis=0) + gt_score = np.concatenate( + (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0) + if 'gt_poly' in label_info: + label_info['gt_poly'] = gt_poly1 + gt_poly2 + is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0) + elif 0 in gt_class1: + gt_bbox = gt_bbox2 + gt_class = gt_class2 + gt_score = gt_score2 * (1. - factor) + if 'gt_poly' in label_info: + label_info['gt_poly'] = gt_poly2 + is_crowd = is_crowd2 + else: + gt_bbox = gt_bbox1 + gt_class = gt_class1 + gt_score = gt_score1 * factor + if 'gt_poly' in label_info: + label_info['gt_poly'] = gt_poly1 + is_crowd = is_crowd1 label_info['gt_bbox'] = gt_bbox label_info['gt_score'] = gt_score label_info['gt_class'] = gt_class @@ -814,6 +830,8 @@ class RandomExpand(DetTransform): if np.random.uniform(0., 1.) < self.prob: return (im, im_info, label_info) + if 'gt_class' in label_info and 0 in label_info['gt_class']: + return (im, im_info, label_info) image_shape = im_info['image_shape'] height = int(image_shape[0]) width = int(image_shape[1]) @@ -909,6 +927,8 @@ class RandomCrop(DetTransform): if len(label_info['gt_bbox']) == 0: return (im, im_info, label_info) + if 'gt_class' in label_info and 0 in label_info['gt_class']: + return (im, im_info, label_info) image_shape = im_info['image_shape'] w = image_shape[1] @@ -1204,9 +1224,10 @@ class ArrangeYOLOv3(DetTransform): if gt_num > 0: label_info['gt_class'][:gt_num, 0] = label_info[ 'gt_class'][:gt_num, 0] - 1 - gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :] - gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0] - gt_score[:gt_num] = label_info['gt_score'][:gt_num, 0] + if -1 not in label_info['gt_class']: + gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :] + gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0] + gt_score[:gt_num] = label_info['gt_score'][:gt_num, 0] # parse [x1, y1, x2, y2] to [x, y, w, h] gt_bbox[:, 2:4] = gt_bbox[:, 2:4] - gt_bbox[:, :2] gt_bbox[:, :2] = gt_bbox[:, :2] + gt_bbox[:, 2:4] / 2. @@ -1287,7 +1308,7 @@ class ComposedRCNNTransforms(Compose): super(ComposedRCNNTransforms, self).__init__(transforms) -class ComposedYOLOTransforms(Compose): +class ComposedYOLOv3Transforms(Compose): """YOLOv3模型的图像预处理流程,具体如下, 训练阶段: 1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage @@ -1342,4 +1363,4 @@ class ComposedYOLOTransforms(Compose): target_size=width, interp='CUBIC'), Normalize( mean=mean, std=std) ] - super(ComposedYOLOTransforms, self).__init__(transforms) + super(ComposedYOLOv3Transforms, self).__init__(transforms) diff --git a/paddlex/interpret/as_data_reader/data_path_utils.py b/paddlex/interpret/as_data_reader/data_path_utils.py index e0604f4f5dfc2a622659bb537046a92cd4c2ce61..1c915050bed935c4e7f6ea34be6a231f7c05f44c 100644 --- a/paddlex/interpret/as_data_reader/data_path_utils.py +++ b/paddlex/interpret/as_data_reader/data_path_utils.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os @@ -19,4 +19,4 @@ def _find_classes(dir): classes = [d.name for d in os.scandir(dir) if d.is_dir()] classes.sort() class_to_idx = {classes[i]: i for i in range(len(classes))} - return classes, class_to_idx \ No newline at end of file + return classes, class_to_idx diff --git a/paddlex/interpret/as_data_reader/readers.py b/paddlex/interpret/as_data_reader/readers.py index d9244f17fa0a93f26589b29471ade59843b7d454..4b551177334c1da6546a605f2cee00518d90c57a 100644 --- a/paddlex/interpret/as_data_reader/readers.py +++ b/paddlex/interpret/as_data_reader/readers.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import sys @@ -223,4 +223,4 @@ def create_reader(list_image_path, list_label=None, is_test=False): yield img_show, img, label - return reader \ No newline at end of file + return reader diff --git a/paddlex/interpret/core/_session_preparation.py b/paddlex/interpret/core/_session_preparation.py index f75fa2464fe43969ec76c557c43344c0f2ae877f..81d3b22b216f07047b6a3a4c39701a03ec96a964 100644 --- a/paddlex/interpret/core/_session_preparation.py +++ b/paddlex/interpret/core/_session_preparation.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import os.path as osp @@ -20,6 +20,7 @@ import numpy as np from paddle.fluid.param_attr import ParamAttr from paddlex.interpret.as_data_reader.readers import preprocess_image + def gen_user_home(): if "HOME" in os.environ: home_path = os.environ["HOME"] @@ -34,10 +35,20 @@ def paddle_get_fc_weights(var_name="fc_0.w_0"): def paddle_resize(extracted_features, outsize): - resized_features = fluid.layers.resize_bilinear(extracted_features, outsize) + resized_features = fluid.layers.resize_bilinear(extracted_features, + outsize) return resized_features +def get_precomputed_normlime_weights(): + root_path = gen_user_home() + root_path = osp.join(root_path, '.paddlex') + h_pre_models = osp.join(root_path, "pre_models") + normlime_weights_file = osp.join( + h_pre_models, "normlime_weights_imagenet_resnet50vc.npy") + return np.load(normlime_weights_file, allow_pickle=True).item() + + def compute_features_for_kmeans(data_content): root_path = gen_user_home() root_path = osp.join(root_path, '.paddlex') @@ -47,6 +58,7 @@ def compute_features_for_kmeans(data_content): os.makedirs(root_path) url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz" pdx.utils.download_and_decompress(url, path=root_path) + def conv_bn_layer(input, num_filters, filter_size, @@ -55,7 +67,7 @@ def compute_features_for_kmeans(data_content): act=None, name=None, is_test=True, - global_name=''): + global_name='for_kmeans_'): conv = fluid.layers.conv2d( input=input, num_filters=num_filters, @@ -79,14 +91,14 @@ def compute_features_for_kmeans(data_content): bias_attr=ParamAttr(global_name + bn_name + '_offset'), moving_mean_name=global_name + bn_name + '_mean', moving_variance_name=global_name + bn_name + '_variance', - use_global_stats=is_test - ) + use_global_stats=is_test) startup_prog = fluid.default_startup_program().clone(for_test=True) prog = fluid.Program() with fluid.program_guard(prog, startup_prog): with fluid.unique_name.guard(): - image_op = fluid.data(name='image', shape=[None, 3, 224, 224], dtype='float32') + image_op = fluid.data( + name='image', shape=[None, 3, 224, 224], dtype='float32') conv = conv_bn_layer( input=image_op, @@ -110,7 +122,8 @@ def compute_features_for_kmeans(data_content): act='relu', name='conv1_3') extracted_features = conv - resized_features = fluid.layers.resize_bilinear(extracted_features, image_op.shape[2:]) + resized_features = fluid.layers.resize_bilinear(extracted_features, + image_op.shape[2:]) gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) @@ -119,7 +132,10 @@ def compute_features_for_kmeans(data_content): exe.run(startup_prog) fluid.io.load_persistables(exe, h_pre_models, prog) - images = preprocess_image(data_content) # transpose to [N, 3, H, W], scaled to [0.0, 1.0] - result = exe.run(prog, fetch_list=[resized_features], feed={'image': images}) + images = preprocess_image( + data_content) # transpose to [N, 3, H, W], scaled to [0.0, 1.0] + result = exe.run(prog, + fetch_list=[resized_features], + feed={'image': images}) return result[0][0] diff --git a/paddlex/interpret/core/interpretation.py b/paddlex/interpret/core/interpretation.py index 72d8c238a2e1817098eefcae18b0a3b56aedeb6b..ca3b1cf3371f244a1ab55e6940de2cd382fd7ab3 100644 --- a/paddlex/interpret/core/interpretation.py +++ b/paddlex/interpret/core/interpretation.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from .interpretation_algorithms import CAM, LIME, NormLIME from .normlime_base import precompute_normlime_weights @@ -20,12 +20,10 @@ class Interpretation(object): """ Base class for all interpretation algorithms. """ - def __init__(self, interpretation_algorithm_name, predict_fn, label_names, **kwargs): - supported_algorithms = { - 'cam': CAM, - 'lime': LIME, - 'normlime': NormLIME - } + + def __init__(self, interpretation_algorithm_name, predict_fn, label_names, + **kwargs): + supported_algorithms = {'cam': CAM, 'lime': LIME, 'normlime': NormLIME} self.algorithm_name = interpretation_algorithm_name.lower() assert self.algorithm_name in supported_algorithms.keys() @@ -33,19 +31,17 @@ class Interpretation(object): # initialization for the interpretation algorithm. self.algorithm = supported_algorithms[self.algorithm_name]( - self.predict_fn, label_names, **kwargs - ) + self.predict_fn, label_names, **kwargs) - def interpret(self, data_, visualization=True, save_to_disk=True, save_dir='./tmp'): + def interpret(self, data_, visualization=True, save_dir='./'): """ Args: data_: data_ can be a path or numpy.ndarray. visualization: whether to show using matplotlib. - save_to_disk: whether to save the figure in local disk. save_dir: dir to save figure if save_to_disk is True. Returns: """ - return self.algorithm.interpret(data_, visualization, save_to_disk, save_dir) + return self.algorithm.interpret(data_, visualization, save_dir) diff --git a/paddlex/interpret/core/interpretation_algorithms.py b/paddlex/interpret/core/interpretation_algorithms.py index afcea8d2d92531590a1aef986014c5bfd792ea5e..2805af601a91314a5d554511af04b53eef7b653a 100644 --- a/paddlex/interpret/core/interpretation_algorithms.py +++ b/paddlex/interpret/core/interpretation_algorithms.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import os.path as osp @@ -23,7 +23,6 @@ from .normlime_base import combine_normlime_and_lime, get_feature_for_kmeans, lo from paddlex.interpret.as_data_reader.readers import read_image import paddlex.utils.logging as logging - import cv2 @@ -66,25 +65,27 @@ class CAM(object): fc_weights = paddle_get_fc_weights() feature_maps = result[1] - + l = pred_label[0] ln = l if self.label_names is not None: ln = self.label_names[l] prob_str = "%.3f" % (probability[pred_label[0]]) - logging.info("predicted result: {} with probability {}.".format(ln, prob_str)) + logging.info("predicted result: {} with probability {}.".format( + ln, prob_str)) return feature_maps, fc_weights - def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None): + def interpret(self, data_, visualization=True, save_outdir=None): feature_maps, fc_weights = self.preparation_cam(data_) - cam = get_cam(self.image, feature_maps, fc_weights, self.predicted_label) + cam = get_cam(self.image, feature_maps, fc_weights, + self.predicted_label) - if visualization or save_to_disk: + if visualization or save_outdir is not None: import matplotlib.pyplot as plt from skimage.segmentation import mark_boundaries l = self.labels[0] - ln = l + ln = l if self.label_names is not None: ln = self.label_names[l] @@ -93,7 +94,8 @@ class CAM(object): ncols = 2 plt.close() - f, axes = plt.subplots(nrows, ncols, figsize=(psize * ncols, psize * nrows)) + f, axes = plt.subplots( + nrows, ncols, figsize=(psize * ncols, psize * nrows)) for ax in axes.ravel(): ax.axis("off") axes = axes.ravel() @@ -104,8 +106,7 @@ class CAM(object): axes[1].imshow(cam) axes[1].set_title("CAM") - if save_to_disk and save_outdir is not None: - os.makedirs(save_outdir, exist_ok=True) + if save_outdir is not None: save_fig(data_, save_outdir, 'cam') if visualization: @@ -115,7 +116,11 @@ class CAM(object): class LIME(object): - def __init__(self, predict_fn, label_names, num_samples=3000, batch_size=50): + def __init__(self, + predict_fn, + label_names, + num_samples=3000, + batch_size=50): """ LIME wrapper. See lime_base.py for the detailed LIME implementation. Args: @@ -154,31 +159,37 @@ class LIME(object): self.predicted_probability = probability[pred_label[0]] self.image = image_show[0] self.labels = pred_label - + l = pred_label[0] ln = l if self.label_names is not None: ln = self.label_names[l] - + prob_str = "%.3f" % (probability[pred_label[0]]) - logging.info("predicted result: {} with probability {}.".format(ln, prob_str)) + logging.info("predicted result: {} with probability {}.".format( + ln, prob_str)) end = time.time() algo = lime_base.LimeImageInterpreter() - interpreter = algo.interpret_instance(self.image, self.predict_fn, self.labels, 0, - num_samples=self.num_samples, batch_size=self.batch_size) + interpreter = algo.interpret_instance( + self.image, + self.predict_fn, + self.labels, + 0, + num_samples=self.num_samples, + batch_size=self.batch_size) self.lime_interpreter = interpreter logging.info('lime time: ' + str(time.time() - end) + 's.') - def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None): + def interpret(self, data_, visualization=True, save_outdir=None): if self.lime_interpreter is None: self.preparation_lime(data_) - if visualization or save_to_disk: + if visualization or save_outdir is not None: import matplotlib.pyplot as plt from skimage.segmentation import mark_boundaries l = self.labels[0] - ln = l + ln = l if self.label_names is not None: ln = self.label_names[l] @@ -188,7 +199,8 @@ class LIME(object): ncols = len(weights_choices) plt.close() - f, axes = plt.subplots(nrows, ncols, figsize=(psize * ncols, psize * nrows)) + f, axes = plt.subplots( + nrows, ncols, figsize=(psize * ncols, psize * nrows)) for ax in axes.ravel(): ax.axis("off") axes = axes.ravel() @@ -196,20 +208,24 @@ class LIME(object): prob_str = "{%.3f}" % (self.predicted_probability) axes[0].set_title("label {}, proba: {}".format(ln, prob_str)) - axes[1].imshow(mark_boundaries(self.image, self.lime_interpreter.segments)) + axes[1].imshow( + mark_boundaries(self.image, self.lime_interpreter.segments)) axes[1].set_title("superpixel segmentation") # LIME visualization for i, w in enumerate(weights_choices): - num_to_show = auto_choose_num_features_to_show(self.lime_interpreter, l, w) + num_to_show = auto_choose_num_features_to_show( + self.lime_interpreter, l, w) temp, mask = self.lime_interpreter.get_image_and_mask( - l, positive_only=False, hide_rest=False, num_features=num_to_show - ) + l, + positive_only=True, + hide_rest=False, + num_features=num_to_show) axes[ncols + i].imshow(mark_boundaries(temp, mask)) - axes[ncols + i].set_title("label {}, first {} superpixels".format(ln, num_to_show)) + axes[ncols + i].set_title( + "label {}, first {} superpixels".format(ln, num_to_show)) - if save_to_disk and save_outdir is not None: - os.makedirs(save_outdir, exist_ok=True) + if save_outdir is not None: save_fig(data_, save_outdir, 'lime', self.num_samples) if visualization: @@ -218,9 +234,196 @@ class LIME(object): return +class NormLIMEStandard(object): + def __init__(self, + predict_fn, + label_names, + num_samples=3000, + batch_size=50, + kmeans_model_for_normlime=None, + normlime_weights=None): + root_path = gen_user_home() + root_path = osp.join(root_path, '.paddlex') + h_pre_models = osp.join(root_path, "pre_models") + if not osp.exists(h_pre_models): + if not osp.exists(root_path): + os.makedirs(root_path) + url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz" + pdx.utils.download_and_decompress(url, path=root_path) + h_pre_models_kmeans = osp.join(h_pre_models, "kmeans_model.pkl") + if kmeans_model_for_normlime is None: + try: + self.kmeans_model = load_kmeans_model(h_pre_models_kmeans) + except: + raise ValueError( + "NormLIME needs the KMeans model, where we provided a default one in " + "pre_models/kmeans_model.pkl.") + else: + logging.debug("Warning: It is *strongly* suggested to use the \ + default KMeans model in pre_models/kmeans_model.pkl. \ + Use another one will change the final result.") + self.kmeans_model = load_kmeans_model(kmeans_model_for_normlime) + + self.num_samples = num_samples + self.batch_size = batch_size + + try: + self.normlime_weights = np.load( + normlime_weights, allow_pickle=True).item() + except: + self.normlime_weights = None + logging.debug( + "Warning: not find the correct precomputed Normlime result.") + + self.predict_fn = predict_fn + + self.labels = None + self.image = None + self.label_names = label_names + + def predict_cluster_labels(self, feature_map, segments): + X = get_feature_for_kmeans(feature_map, segments) + try: + cluster_labels = self.kmeans_model.predict(X) + except AttributeError: + from sklearn.metrics import pairwise_distances_argmin_min + cluster_labels, _ = pairwise_distances_argmin_min( + X, self.kmeans_model.cluster_centers_) + return cluster_labels + + def predict_using_normlime_weights(self, pred_labels, + predicted_cluster_labels): + # global weights + g_weights = {y: [] for y in pred_labels} + for y in pred_labels: + cluster_weights_y = self.normlime_weights.get(y, {}) + g_weights[y] = [(i, cluster_weights_y.get(k, 0.0)) + for i, k in enumerate(predicted_cluster_labels)] + + g_weights[y] = sorted( + g_weights[y], key=lambda x: np.abs(x[1]), reverse=True) + + return g_weights + + def preparation_normlime(self, data_): + self._lime = LIME(self.predict_fn, self.label_names, self.num_samples, + self.batch_size) + self._lime.preparation_lime(data_) + + image_show = read_image(data_) + + self.predicted_label = self._lime.predicted_label + self.predicted_probability = self._lime.predicted_probability + self.image = image_show[0] + self.labels = self._lime.labels + logging.info('performing NormLIME operations ...') + + cluster_labels = self.predict_cluster_labels( + compute_features_for_kmeans(image_show).transpose((1, 2, 0)), + self._lime.lime_interpreter.segments) + + g_weights = self.predict_using_normlime_weights(self.labels, + cluster_labels) + + return g_weights + + def interpret(self, data_, visualization=True, save_outdir=None): + if self.normlime_weights is None: + raise ValueError( + "Not find the correct precomputed NormLIME result. \n" + "\t Try to call compute_normlime_weights() first or load the correct path." + ) + + g_weights = self.preparation_normlime(data_) + lime_weights = self._lime.lime_interpreter.local_weights + + if visualization or save_outdir is not None: + import matplotlib.pyplot as plt + from skimage.segmentation import mark_boundaries + l = self.labels[0] + ln = l + if self.label_names is not None: + ln = self.label_names[l] + + psize = 5 + nrows = 4 + weights_choices = [0.6, 0.7, 0.75, 0.8, 0.85] + nums_to_show = [] + ncols = len(weights_choices) + + plt.close() + f, axes = plt.subplots( + nrows, ncols, figsize=(psize * ncols, psize * nrows)) + for ax in axes.ravel(): + ax.axis("off") + + axes = axes.ravel() + axes[0].imshow(self.image) + prob_str = "{%.3f}" % (self.predicted_probability) + axes[0].set_title("label {}, proba: {}".format(ln, prob_str)) + + axes[1].imshow( + mark_boundaries(self.image, + self._lime.lime_interpreter.segments)) + axes[1].set_title("superpixel segmentation") + + # LIME visualization + for i, w in enumerate(weights_choices): + num_to_show = auto_choose_num_features_to_show( + self._lime.lime_interpreter, l, w) + nums_to_show.append(num_to_show) + temp, mask = self._lime.lime_interpreter.get_image_and_mask( + l, + positive_only=False, + hide_rest=False, + num_features=num_to_show) + axes[ncols + i].imshow(mark_boundaries(temp, mask)) + axes[ncols + i].set_title("LIME: first {} superpixels".format( + num_to_show)) + + # NormLIME visualization + self._lime.lime_interpreter.local_weights = g_weights + for i, num_to_show in enumerate(nums_to_show): + temp, mask = self._lime.lime_interpreter.get_image_and_mask( + l, + positive_only=False, + hide_rest=False, + num_features=num_to_show) + axes[ncols * 2 + i].imshow(mark_boundaries(temp, mask)) + axes[ncols * 2 + i].set_title( + "NormLIME: first {} superpixels".format(num_to_show)) + + # NormLIME*LIME visualization + combined_weights = combine_normlime_and_lime(lime_weights, + g_weights) + self._lime.lime_interpreter.local_weights = combined_weights + for i, num_to_show in enumerate(nums_to_show): + temp, mask = self._lime.lime_interpreter.get_image_and_mask( + l, + positive_only=False, + hide_rest=False, + num_features=num_to_show) + axes[ncols * 3 + i].imshow(mark_boundaries(temp, mask)) + axes[ncols * 3 + i].set_title( + "Combined: first {} superpixels".format(num_to_show)) + + self._lime.lime_interpreter.local_weights = lime_weights + + if save_outdir is not None: + save_fig(data_, save_outdir, 'normlime', self.num_samples) + + if visualization: + plt.show() + + class NormLIME(object): - def __init__(self, predict_fn, label_names, num_samples=3000, batch_size=50, - kmeans_model_for_normlime=None, normlime_weights=None): + def __init__(self, + predict_fn, + label_names, + num_samples=3000, + batch_size=50, + kmeans_model_for_normlime=None, + normlime_weights=None): root_path = gen_user_home() root_path = osp.join(root_path, '.paddlex') h_pre_models = osp.join(root_path, "pre_models") @@ -234,8 +437,9 @@ class NormLIME(object): try: self.kmeans_model = load_kmeans_model(h_pre_models_kmeans) except: - raise ValueError("NormLIME needs the KMeans model, where we provided a default one in " - "pre_models/kmeans_model.pkl.") + raise ValueError( + "NormLIME needs the KMeans model, where we provided a default one in " + "pre_models/kmeans_model.pkl.") else: logging.debug("Warning: It is *strongly* suggested to use the \ default KMeans model in pre_models/kmeans_model.pkl. \ @@ -246,10 +450,12 @@ class NormLIME(object): self.batch_size = batch_size try: - self.normlime_weights = np.load(normlime_weights, allow_pickle=True).item() + self.normlime_weights = np.load( + normlime_weights, allow_pickle=True).item() except: self.normlime_weights = None - logging.debug("Warning: not find the correct precomputed Normlime result.") + logging.debug( + "Warning: not find the correct precomputed Normlime result.") self.predict_fn = predict_fn @@ -263,30 +469,27 @@ class NormLIME(object): cluster_labels = self.kmeans_model.predict(X) except AttributeError: from sklearn.metrics import pairwise_distances_argmin_min - cluster_labels, _ = pairwise_distances_argmin_min(X, self.kmeans_model.cluster_centers_) + cluster_labels, _ = pairwise_distances_argmin_min( + X, self.kmeans_model.cluster_centers_) return cluster_labels - def predict_using_normlime_weights(self, pred_labels, predicted_cluster_labels): + def predict_using_normlime_weights(self, pred_labels, + predicted_cluster_labels): # global weights g_weights = {y: [] for y in pred_labels} for y in pred_labels: cluster_weights_y = self.normlime_weights.get(y, {}) - g_weights[y] = [ - (i, cluster_weights_y.get(k, 0.0)) for i, k in enumerate(predicted_cluster_labels) - ] + g_weights[y] = [(i, cluster_weights_y.get(k, 0.0)) + for i, k in enumerate(predicted_cluster_labels)] - g_weights[y] = sorted(g_weights[y], - key=lambda x: np.abs(x[1]), reverse=True) + g_weights[y] = sorted( + g_weights[y], key=lambda x: np.abs(x[1]), reverse=True) return g_weights def preparation_normlime(self, data_): - self._lime = LIME( - self.predict_fn, - self.label_names, - self.num_samples, - self.batch_size - ) + self._lime = LIME(self.predict_fn, self.label_names, self.num_samples, + self.batch_size) self._lime.preparation_lime(data_) image_show = read_image(data_) @@ -298,22 +501,25 @@ class NormLIME(object): logging.info('performing NormLIME operations ...') cluster_labels = self.predict_cluster_labels( - compute_features_for_kmeans(image_show).transpose((1, 2, 0)), self._lime.lime_interpreter.segments - ) + compute_features_for_kmeans(image_show).transpose((1, 2, 0)), + self._lime.lime_interpreter.segments) - g_weights = self.predict_using_normlime_weights(self.labels, cluster_labels) + g_weights = self.predict_using_normlime_weights(self.labels, + cluster_labels) return g_weights - def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None): + def interpret(self, data_, visualization=True, save_outdir=None): if self.normlime_weights is None: - raise ValueError("Not find the correct precomputed NormLIME result. \n" - "\t Try to call compute_normlime_weights() first or load the correct path.") + raise ValueError( + "Not find the correct precomputed NormLIME result. \n" + "\t Try to call compute_normlime_weights() first or load the correct path." + ) g_weights = self.preparation_normlime(data_) lime_weights = self._lime.lime_interpreter.local_weights - if visualization or save_to_disk: + if visualization or save_outdir is not None: import matplotlib.pyplot as plt from skimage.segmentation import mark_boundaries l = self.labels[0] @@ -328,7 +534,8 @@ class NormLIME(object): ncols = len(weights_choices) plt.close() - f, axes = plt.subplots(nrows, ncols, figsize=(psize * ncols, psize * nrows)) + f, axes = plt.subplots( + nrows, ncols, figsize=(psize * ncols, psize * nrows)) for ax in axes.ravel(): ax.axis("off") @@ -337,64 +544,83 @@ class NormLIME(object): prob_str = "{%.3f}" % (self.predicted_probability) axes[0].set_title("label {}, proba: {}".format(ln, prob_str)) - axes[1].imshow(mark_boundaries(self.image, self._lime.lime_interpreter.segments)) + axes[1].imshow( + mark_boundaries(self.image, + self._lime.lime_interpreter.segments)) axes[1].set_title("superpixel segmentation") # LIME visualization for i, w in enumerate(weights_choices): - num_to_show = auto_choose_num_features_to_show(self._lime.lime_interpreter, l, w) + num_to_show = auto_choose_num_features_to_show( + self._lime.lime_interpreter, l, w) nums_to_show.append(num_to_show) temp, mask = self._lime.lime_interpreter.get_image_and_mask( - l, positive_only=False, hide_rest=False, num_features=num_to_show - ) + l, + positive_only=True, + hide_rest=False, + num_features=num_to_show) axes[ncols + i].imshow(mark_boundaries(temp, mask)) - axes[ncols + i].set_title("LIME: first {} superpixels".format(num_to_show)) + axes[ncols + i].set_title("LIME: first {} superpixels".format( + num_to_show)) # NormLIME visualization self._lime.lime_interpreter.local_weights = g_weights for i, num_to_show in enumerate(nums_to_show): temp, mask = self._lime.lime_interpreter.get_image_and_mask( - l, positive_only=False, hide_rest=False, num_features=num_to_show - ) + l, + positive_only=True, + hide_rest=False, + num_features=num_to_show) axes[ncols * 2 + i].imshow(mark_boundaries(temp, mask)) - axes[ncols * 2 + i].set_title("NormLIME: first {} superpixels".format(num_to_show)) + axes[ncols * 2 + i].set_title( + "NormLIME: first {} superpixels".format(num_to_show)) # NormLIME*LIME visualization - combined_weights = combine_normlime_and_lime(lime_weights, g_weights) + combined_weights = combine_normlime_and_lime(lime_weights, + g_weights) + self._lime.lime_interpreter.local_weights = combined_weights for i, num_to_show in enumerate(nums_to_show): temp, mask = self._lime.lime_interpreter.get_image_and_mask( - l, positive_only=False, hide_rest=False, num_features=num_to_show - ) + l, + positive_only=True, + hide_rest=False, + num_features=num_to_show) axes[ncols * 3 + i].imshow(mark_boundaries(temp, mask)) - axes[ncols * 3 + i].set_title("Combined: first {} superpixels".format(num_to_show)) + axes[ncols * 3 + i].set_title( + "Combined: first {} superpixels".format(num_to_show)) self._lime.lime_interpreter.local_weights = lime_weights - if save_to_disk and save_outdir is not None: - os.makedirs(save_outdir, exist_ok=True) + if save_outdir is not None: save_fig(data_, save_outdir, 'normlime', self.num_samples) if visualization: plt.show() -def auto_choose_num_features_to_show(lime_interpreter, label, percentage_to_show): +def auto_choose_num_features_to_show(lime_interpreter, label, + percentage_to_show): segments = lime_interpreter.segments lime_weights = lime_interpreter.local_weights[label] - num_pixels_threshold_in_a_sp = segments.shape[0] * segments.shape[1] // len(np.unique(segments)) // 8 + num_pixels_threshold_in_a_sp = segments.shape[0] * segments.shape[ + 1] // len(np.unique(segments)) // 8 # l1 norm with filtered weights. - used_weights = [(tuple_w[0], tuple_w[1]) for i, tuple_w in enumerate(lime_weights) if tuple_w[1] > 0] + used_weights = [(tuple_w[0], tuple_w[1]) + for i, tuple_w in enumerate(lime_weights) + if tuple_w[1] > 0] norm = np.sum([tuple_w[1] for i, tuple_w in enumerate(used_weights)]) - normalized_weights = [(tuple_w[0], tuple_w[1] / norm) for i, tuple_w in enumerate(lime_weights)] + normalized_weights = [(tuple_w[0], tuple_w[1] / norm) + for i, tuple_w in enumerate(lime_weights)] a = 0.0 n = 0 for i, tuple_w in enumerate(normalized_weights): if tuple_w[1] < 0: continue - if len(np.where(segments == tuple_w[0])[0]) < num_pixels_threshold_in_a_sp: + if len(np.where(segments == tuple_w[0])[ + 0]) < num_pixels_threshold_in_a_sp: continue a += tuple_w[1] @@ -406,12 +632,18 @@ def auto_choose_num_features_to_show(lime_interpreter, label, percentage_to_show return 5 if n == 0: - return auto_choose_num_features_to_show(lime_interpreter, label, percentage_to_show-0.1) + return auto_choose_num_features_to_show(lime_interpreter, label, + percentage_to_show - 0.1) return n -def get_cam(image_show, feature_maps, fc_weights, label_index, cam_min=None, cam_max=None): +def get_cam(image_show, + feature_maps, + fc_weights, + label_index, + cam_min=None, + cam_max=None): _, nc, h, w = feature_maps.shape cam = feature_maps * fc_weights[:, label_index].reshape(1, nc, 1, 1) @@ -425,7 +657,8 @@ def get_cam(image_show, feature_maps, fc_weights, label_index, cam_min=None, cam cam = cam - cam_min cam = cam / cam_max cam = np.uint8(255 * cam) - cam_img = cv2.resize(cam, image_show.shape[0:2], interpolation=cv2.INTER_LINEAR) + cam_img = cv2.resize( + cam, image_show.shape[0:2], interpolation=cv2.INTER_LINEAR) heatmap = cv2.applyColorMap(np.uint8(255 * cam_img), cv2.COLORMAP_JET) heatmap = np.float32(heatmap) @@ -437,34 +670,11 @@ def get_cam(image_show, feature_maps, fc_weights, label_index, cam_min=None, cam def save_fig(data_, save_outdir, algorithm_name, num_samples=3000): import matplotlib.pyplot as plt - if isinstance(data_, str): - if algorithm_name == 'cam': - f_out = "{}_{}.png".format(algorithm_name, data_.split('/')[-1]) - else: - f_out = "{}_{}_s{}.png".format(algorithm_name, data_.split('/')[-1], num_samples) - plt.savefig( - os.path.join(save_outdir, f_out) - ) + if algorithm_name == 'cam': + f_out = "{}_{}.png".format(algorithm_name, data_.split('/')[-1]) else: - n = 0 - if algorithm_name == 'cam': - f_out = 'cam-{}.png'.format(n) - else: - f_out = '{}_s{}-{}.png'.format(algorithm_name, num_samples, n) - while os.path.exists( - os.path.join(save_outdir, f_out) - ): - n += 1 - if algorithm_name == 'cam': - f_out = 'cam-{}.png'.format(n) - else: - f_out = '{}_s{}-{}.png'.format(algorithm_name, num_samples, n) - continue - plt.savefig( - os.path.join( - save_outdir, f_out - ) - ) - logging.info('The image of intrepretation result save in {}'.format(os.path.join( - save_outdir, f_out - ))) + f_out = "{}_{}_s{}.png".format(save_outdir, algorithm_name, + num_samples) + + plt.savefig(f_out) + logging.info('The image of intrepretation result save in {}'.format(f_out)) diff --git a/paddlex/interpret/core/lime_base.py b/paddlex/interpret/core/lime_base.py index 3d3bd96d0e7b5ffb0de2d2f8156a03021cfad312..d7b44016ae41656c41db25572133e5a6cfc57675 100644 --- a/paddlex/interpret/core/lime_base.py +++ b/paddlex/interpret/core/lime_base.py @@ -27,7 +27,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. The code in this file (lime_base.py) is modified from https://github.com/marcotcr/lime. """ - import numpy as np import scipy as sp @@ -39,10 +38,8 @@ import paddlex.utils.logging as logging class LimeBase(object): """Class for learning a locally linear sparse model from perturbed data""" - def __init__(self, - kernel_fn, - verbose=False, - random_state=None): + + def __init__(self, kernel_fn, verbose=False, random_state=None): """Init function Args: @@ -72,15 +69,14 @@ class LimeBase(object): """ from sklearn.linear_model import lars_path x_vector = weighted_data - alphas, _, coefs = lars_path(x_vector, - weighted_labels, - method='lasso', - verbose=False) + alphas, _, coefs = lars_path( + x_vector, weighted_labels, method='lasso', verbose=False) return alphas, coefs def forward_selection(self, data, labels, weights, num_features): """Iteratively adds features to the model""" - clf = Ridge(alpha=0, fit_intercept=True, random_state=self.random_state) + clf = Ridge( + alpha=0, fit_intercept=True, random_state=self.random_state) used_features = [] for _ in range(min(num_features, data.shape[1])): max_ = -100000000 @@ -88,11 +84,13 @@ class LimeBase(object): for feature in range(data.shape[1]): if feature in used_features: continue - clf.fit(data[:, used_features + [feature]], labels, + clf.fit(data[:, used_features + [feature]], + labels, sample_weight=weights) - score = clf.score(data[:, used_features + [feature]], - labels, - sample_weight=weights) + score = clf.score( + data[:, used_features + [feature]], + labels, + sample_weight=weights) if score > max_: best = feature max_ = score @@ -108,8 +106,8 @@ class LimeBase(object): elif method == 'forward_selection': return self.forward_selection(data, labels, weights, num_features) elif method == 'highest_weights': - clf = Ridge(alpha=0.01, fit_intercept=True, - random_state=self.random_state) + clf = Ridge( + alpha=0.01, fit_intercept=True, random_state=self.random_state) clf.fit(data, labels, sample_weight=weights) coef = clf.coef_ @@ -125,7 +123,8 @@ class LimeBase(object): nnz_indexes = argsort_data[::-1] indices = weighted_data.indices[nnz_indexes] num_to_pad = num_features - sdata - indices = np.concatenate((indices, np.zeros(num_to_pad, dtype=indices.dtype))) + indices = np.concatenate((indices, np.zeros( + num_to_pad, dtype=indices.dtype))) indices_set = set(indices) pad_counter = 0 for i in range(data.shape[1]): @@ -135,7 +134,8 @@ class LimeBase(object): if pad_counter >= num_to_pad: break else: - nnz_indexes = argsort_data[sdata - num_features:sdata][::-1] + nnz_indexes = argsort_data[sdata - num_features:sdata][:: + -1] indices = weighted_data.indices[nnz_indexes] return indices else: @@ -146,13 +146,13 @@ class LimeBase(object): reverse=True) return np.array([x[0] for x in feature_weights[:num_features]]) elif method == 'lasso_path': - weighted_data = ((data - np.average(data, axis=0, weights=weights)) - * np.sqrt(weights[:, np.newaxis])) - weighted_labels = ((labels - np.average(labels, weights=weights)) - * np.sqrt(weights)) + weighted_data = ((data - np.average( + data, axis=0, weights=weights)) * + np.sqrt(weights[:, np.newaxis])) + weighted_labels = ((labels - np.average( + labels, weights=weights)) * np.sqrt(weights)) nonzero = range(weighted_data.shape[1]) - _, coefs = self.generate_lars_path(weighted_data, - weighted_labels) + _, coefs = self.generate_lars_path(weighted_data, weighted_labels) for i in range(len(coefs.T) - 1, 0, -1): nonzero = coefs.T[i].nonzero()[0] if len(nonzero) <= num_features: @@ -164,8 +164,8 @@ class LimeBase(object): n_method = 'forward_selection' else: n_method = 'highest_weights' - return self.feature_selection(data, labels, weights, - num_features, n_method) + return self.feature_selection(data, labels, weights, num_features, + n_method) def interpret_instance_with_data(self, neighborhood_data, @@ -214,30 +214,31 @@ class LimeBase(object): weights = self.kernel_fn(distances) labels_column = neighborhood_labels[:, label] used_features = self.feature_selection(neighborhood_data, - labels_column, - weights, - num_features, - feature_selection) + labels_column, weights, + num_features, feature_selection) if model_regressor is None: - model_regressor = Ridge(alpha=1, fit_intercept=True, - random_state=self.random_state) + model_regressor = Ridge( + alpha=1, fit_intercept=True, random_state=self.random_state) easy_model = model_regressor easy_model.fit(neighborhood_data[:, used_features], - labels_column, sample_weight=weights) + labels_column, + sample_weight=weights) prediction_score = easy_model.score( neighborhood_data[:, used_features], - labels_column, sample_weight=weights) + labels_column, + sample_weight=weights) - local_pred = easy_model.predict(neighborhood_data[0, used_features].reshape(1, -1)) + local_pred = easy_model.predict(neighborhood_data[0, used_features] + .reshape(1, -1)) if self.verbose: logging.info('Intercept' + str(easy_model.intercept_)) logging.info('Prediction_local' + str(local_pred)) logging.info('Right:' + str(neighborhood_labels[0, label])) - return (easy_model.intercept_, - sorted(zip(used_features, easy_model.coef_), - key=lambda x: np.abs(x[1]), reverse=True), - prediction_score, local_pred) + return (easy_model.intercept_, sorted( + zip(used_features, easy_model.coef_), + key=lambda x: np.abs(x[1]), + reverse=True), prediction_score, local_pred) class ImageInterpretation(object): @@ -254,8 +255,13 @@ class ImageInterpretation(object): self.local_weights = {} self.local_pred = None - def get_image_and_mask(self, label, positive_only=True, negative_only=False, hide_rest=False, - num_features=5, min_weight=0.): + def get_image_and_mask(self, + label, + positive_only=True, + negative_only=False, + hide_rest=False, + num_features=5, + min_weight=0.): """Init function. Args: @@ -279,7 +285,9 @@ class ImageInterpretation(object): if label not in self.local_weights: raise KeyError('Label not in interpretation') if positive_only & negative_only: - raise ValueError("Positive_only and negative_only cannot be true at the same time.") + raise ValueError( + "Positive_only and negative_only cannot be true at the same time." + ) segments = self.segments image = self.image local_weights_label = self.local_weights[label] @@ -289,14 +297,20 @@ class ImageInterpretation(object): else: temp = self.image.copy() if positive_only: - fs = [x[0] for x in local_weights_label - if x[1] > 0 and x[1] > min_weight][:num_features] + fs = [ + x[0] for x in local_weights_label + if x[1] > 0 and x[1] > min_weight + ][:num_features] if negative_only: - fs = [x[0] for x in local_weights_label - if x[1] < 0 and abs(x[1]) > min_weight][:num_features] + fs = [ + x[0] for x in local_weights_label + if x[1] < 0 and abs(x[1]) > min_weight + ][:num_features] if positive_only or negative_only: + c = 1 if positive_only else 0 for f in fs: - temp[segments == f] = image[segments == f].copy() + temp[segments == f] = [0, 255, 0] + # temp[segments == f, c] = np.max(image) mask[segments == f] = 1 return temp, mask else: @@ -330,8 +344,11 @@ class ImageInterpretation(object): temp = np.zeros_like(image) weight_max = abs(local_weights_label[0][1]) - local_weights_label = [(f, w/weight_max) for f, w in local_weights_label] - local_weights_label = sorted(local_weights_label, key=lambda x: x[1], reverse=True) # negatives are at last. + local_weights_label = [(f, w / weight_max) + for f, w in local_weights_label] + local_weights_label = sorted( + local_weights_label, key=lambda x: x[1], + reverse=True) # negatives are at last. cmaps = cm.get_cmap('Spectral') colors = cmaps(np.linspace(0, 1, len(local_weights_label))) @@ -354,8 +371,12 @@ class LimeImageInterpreter(object): feature that is 1 when the value is the same as the instance being interpreted.""" - def __init__(self, kernel_width=.25, kernel=None, verbose=False, - feature_selection='auto', random_state=None): + def __init__(self, + kernel_width=.25, + kernel=None, + verbose=False, + feature_selection='auto', + random_state=None): """Init function. Args: @@ -377,22 +398,27 @@ class LimeImageInterpreter(object): kernel_width = float(kernel_width) if kernel is None: + def kernel(d, kernel_width): - return np.sqrt(np.exp(-(d ** 2) / kernel_width ** 2)) + return np.sqrt(np.exp(-(d**2) / kernel_width**2)) kernel_fn = partial(kernel, kernel_width=kernel_width) self.random_state = check_random_state(random_state) self.feature_selection = feature_selection - self.base = LimeBase(kernel_fn, verbose, random_state=self.random_state) + self.base = LimeBase( + kernel_fn, verbose, random_state=self.random_state) - def interpret_instance(self, image, classifier_fn, labels=(1,), + def interpret_instance(self, + image, + classifier_fn, + labels=(1, ), hide_color=None, - num_features=100000, num_samples=1000, + num_features=100000, + num_samples=1000, batch_size=10, distance_metric='cosine', - model_regressor=None - ): + model_regressor=None): """Generates interpretations for a prediction. First, we generate neighborhood data by randomly perturbing features @@ -435,6 +461,7 @@ class LimeImageInterpreter(object): self.segments = segments fudged_image = image.copy() + # global_mean = np.mean(image, (0, 1)) if hide_color is None: # if no hide_color, use the mean for x in np.unique(segments): @@ -461,24 +488,30 @@ class LimeImageInterpreter(object): top = labels - data, labels = self.data_labels(image, fudged_image, segments, - classifier_fn, num_samples, - batch_size=batch_size) + data, labels = self.data_labels( + image, + fudged_image, + segments, + classifier_fn, + num_samples, + batch_size=batch_size) distances = sklearn.metrics.pairwise_distances( - data, - data[0].reshape(1, -1), - metric=distance_metric - ).ravel() + data, data[0].reshape(1, -1), metric=distance_metric).ravel() interpretation_image = ImageInterpretation(image, segments) for label in top: (interpretation_image.intercept[label], interpretation_image.local_weights[label], - interpretation_image.score, interpretation_image.local_pred) = self.base.interpret_instance_with_data( - data, labels, distances, label, num_features, - model_regressor=model_regressor, - feature_selection=self.feature_selection) + interpretation_image.score, interpretation_image.local_pred + ) = self.base.interpret_instance_with_data( + data, + labels, + distances, + label, + num_features, + model_regressor=model_regressor, + feature_selection=self.feature_selection) return interpretation_image def data_labels(self, @@ -511,6 +544,9 @@ class LimeImageInterpreter(object): labels = [] data[0, :] = 1 imgs = [] + + logging.info("Computing LIME.", use_color=True) + for row in tqdm.tqdm(data): temp = copy.deepcopy(image) zeros = np.where(row == 0)[0] diff --git a/paddlex/interpret/core/normlime_base.py b/paddlex/interpret/core/normlime_base.py index 3b3a94212ded51d1300b9ae78f4cdab0e1589903..8270099b17c858688903354bffcfa412ed8c804c 100644 --- a/paddlex/interpret/core/normlime_base.py +++ b/paddlex/interpret/core/normlime_base.py @@ -1,21 +1,22 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import os.path as osp import numpy as np import glob +import tqdm from paddlex.interpret.as_data_reader.readers import read_image import paddlex.utils.logging as logging @@ -38,18 +39,24 @@ def combine_normlime_and_lime(lime_weights, g_weights): for y in pred_labels: normlized_lime_weights_y = lime_weights[y] - lime_weights_dict = {tuple_w[0]: tuple_w[1] for tuple_w in normlized_lime_weights_y} + lime_weights_dict = { + tuple_w[0]: tuple_w[1] + for tuple_w in normlized_lime_weights_y + } normlized_g_weight_y = g_weights[y] - normlime_weights_dict = {tuple_w[0]: tuple_w[1] for tuple_w in normlized_g_weight_y} + normlime_weights_dict = { + tuple_w[0]: tuple_w[1] + for tuple_w in normlized_g_weight_y + } combined_weights[y] = [ (seg_k, lime_weights_dict[seg_k] * normlime_weights_dict[seg_k]) for seg_k in lime_weights_dict.keys() ] - combined_weights[y] = sorted(combined_weights[y], - key=lambda x: np.abs(x[1]), reverse=True) + combined_weights[y] = sorted( + combined_weights[y], key=lambda x: np.abs(x[1]), reverse=True) return combined_weights @@ -67,7 +74,8 @@ def centroid_using_superpixels(features, segments): regions = regionprops(segments + 1) one_list = np.zeros((len(np.unique(segments)), features.shape[2])) for i, r in enumerate(regions): - one_list[i] = features[int(r.centroid[0] + 0.5), int(r.centroid[1] + 0.5), :] + one_list[i] = features[int(r.centroid[0] + 0.5), int(r.centroid[1] + + 0.5), :] return one_list @@ -80,30 +88,39 @@ def get_feature_for_kmeans(feature_map, segments): return x -def precompute_normlime_weights(list_data_, predict_fn, num_samples=3000, batch_size=50, save_dir='./tmp'): +def precompute_normlime_weights(list_data_, + predict_fn, + num_samples=3000, + batch_size=50, + save_dir='./tmp'): # save lime weights and kmeans cluster labels - precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, save_dir) + precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, + save_dir) # load precomputed results, compute normlime weights and save. - fname_list = glob.glob(os.path.join(save_dir, 'lime_weights_s{}*.npy'.format(num_samples))) + fname_list = glob.glob( + os.path.join(save_dir, 'lime_weights_s{}*.npy'.format(num_samples))) return compute_normlime_weights(fname_list, save_dir, num_samples) -def save_one_lime_predict_and_kmean_labels(lime_all_weights, image_pred_labels, cluster_labels, save_path): +def save_one_lime_predict_and_kmean_labels(lime_all_weights, image_pred_labels, + cluster_labels, save_path): lime_weights = {} for label in image_pred_labels: lime_weights[label] = lime_all_weights[label] for_normlime_weights = { - 'lime_weights': lime_weights, # a dict: class_label: (seg_label, weight) + 'lime_weights': + lime_weights, # a dict: class_label: (seg_label, weight) 'cluster': cluster_labels # a list with segments as indices. } np.save(save_path, for_normlime_weights) -def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, save_dir): +def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, + save_dir): root_path = gen_user_home() root_path = osp.join(root_path, '.paddlex') h_pre_models = osp.join(root_path, "pre_models") @@ -117,17 +134,24 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav for data_index, each_data_ in enumerate(list_data_): if isinstance(each_data_, str): - save_path = "lime_weights_s{}_{}.npy".format(num_samples, each_data_.split('/')[-1].split('.')[0]) + save_path = "lime_weights_s{}_{}.npy".format( + num_samples, each_data_.split('/')[-1].split('.')[0]) save_path = os.path.join(save_dir, save_path) else: - save_path = "lime_weights_s{}_{}.npy".format(num_samples, data_index) + save_path = "lime_weights_s{}_{}.npy".format(num_samples, + data_index) save_path = os.path.join(save_dir, save_path) if os.path.exists(save_path): - logging.info(save_path + ' exists, not computing this one.', use_color=True) + logging.info( + save_path + ' exists, not computing this one.', use_color=True) continue - img_file_name = each_data_ if isinstance(each_data_, str) else data_index - logging.info('processing '+ img_file_name + ' [{}/{}]'.format(data_index, len(list_data_)), use_color=True) + img_file_name = each_data_ if isinstance(each_data_, + str) else data_index + logging.info( + 'processing ' + img_file_name + ' [{}/{}]'.format(data_index, + len(list_data_)), + use_color=True) image_show = read_image(each_data_) result = predict_fn(image_show) @@ -156,32 +180,38 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav pred_label = pred_label[:top_k] algo = lime_base.LimeImageInterpreter() - interpreter = algo.interpret_instance(image_show[0], predict_fn, pred_label, 0, - num_samples=num_samples, batch_size=batch_size) - - X = get_feature_for_kmeans(compute_features_for_kmeans(image_show).transpose((1, 2, 0)), interpreter.segments) + interpreter = algo.interpret_instance( + image_show[0], + predict_fn, + pred_label, + 0, + num_samples=num_samples, + batch_size=batch_size) + + X = get_feature_for_kmeans( + compute_features_for_kmeans(image_show).transpose((1, 2, 0)), + interpreter.segments) try: cluster_labels = kmeans_model.predict(X) except AttributeError: from sklearn.metrics import pairwise_distances_argmin_min - cluster_labels, _ = pairwise_distances_argmin_min(X, kmeans_model.cluster_centers_) + cluster_labels, _ = pairwise_distances_argmin_min( + X, kmeans_model.cluster_centers_) save_one_lime_predict_and_kmean_labels( - interpreter.local_weights, pred_label, - cluster_labels, - save_path - ) + interpreter.local_weights, pred_label, cluster_labels, save_path) def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples): normlime_weights_all_labels = {} - + for f in a_list_lime_fnames: try: lime_weights_and_cluster = np.load(f, allow_pickle=True).item() lime_weights = lime_weights_and_cluster['lime_weights'] cluster = lime_weights_and_cluster['cluster'] except: - logging.info('When loading precomputed LIME result, skipping' + str(f)) + logging.info('When loading precomputed LIME result, skipping' + + str(f)) continue logging.info('Loading precomputed LIME result,' + str(f)) pred_labels = lime_weights.keys() @@ -203,10 +233,12 @@ def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples): for y in normlime_weights_all_labels: normlime_weights = normlime_weights_all_labels.get(y, {}) for k in normlime_weights: - normlime_weights[k] = sum(normlime_weights[k]) / len(normlime_weights[k]) + normlime_weights[k] = sum(normlime_weights[k]) / len( + normlime_weights[k]) # check normlime - if len(normlime_weights_all_labels.keys()) < max(normlime_weights_all_labels.keys()) + 1: + if len(normlime_weights_all_labels.keys()) < max( + normlime_weights_all_labels.keys()) + 1: logging.info( "\n" + \ "Warning: !!! \n" + \ @@ -218,17 +250,166 @@ def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples): ) n = 0 - f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(lime_num_samples, len(a_list_lime_fnames), n) - while os.path.exists( - os.path.join(save_dir, f_out) - ): + f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format( + lime_num_samples, len(a_list_lime_fnames), n) + while os.path.exists(os.path.join(save_dir, f_out)): n += 1 - f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(lime_num_samples, len(a_list_lime_fnames), n) + f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format( + lime_num_samples, len(a_list_lime_fnames), n) continue - np.save( - os.path.join(save_dir, f_out), - normlime_weights_all_labels - ) + np.save(os.path.join(save_dir, f_out), normlime_weights_all_labels) return os.path.join(save_dir, f_out) + +def precompute_global_classifier(dataset, + predict_fn, + save_path, + batch_size=50, + max_num_samples=1000): + from sklearn.linear_model import LogisticRegression + + root_path = gen_user_home() + root_path = osp.join(root_path, '.paddlex') + h_pre_models = osp.join(root_path, "pre_models") + if not osp.exists(h_pre_models): + if not osp.exists(root_path): + os.makedirs(root_path) + url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz" + pdx.utils.download_and_decompress(url, path=root_path) + h_pre_models_kmeans = osp.join(h_pre_models, "kmeans_model.pkl") + kmeans_model = load_kmeans_model(h_pre_models_kmeans) + + image_list = [] + for item in dataset.file_list: + image_list.append(item[0]) + + x_data = [] + y_labels = [] + + num_features = len(kmeans_model.cluster_centers_) + + logging.info( + "Initialization for NormLIME: Computing each sample in the test list.", + use_color=True) + + for each_data_ in tqdm.tqdm(image_list): + x_data_i = np.zeros((num_features)) + image_show = read_image(each_data_) + result = predict_fn(image_show) + result = result[0] # only one image here. + c = compute_features_for_kmeans(image_show).transpose((1, 2, 0)) + + segments = np.zeros((image_show.shape[1], image_show.shape[2]), + np.int32) + num_blocks = 10 + height_per_i = segments.shape[0] // num_blocks + 1 + width_per_i = segments.shape[1] // num_blocks + 1 + + for i in range(segments.shape[0]): + for j in range(segments.shape[1]): + segments[i, + j] = i // height_per_i * num_blocks + j // width_per_i + + # segments = quickshift(image_show[0], sigma=1) + X = get_feature_for_kmeans(c, segments) + + try: + cluster_labels = kmeans_model.predict(X) + except AttributeError: + from sklearn.metrics import pairwise_distances_argmin_min + cluster_labels, _ = pairwise_distances_argmin_min( + X, kmeans_model.cluster_centers_) + + for c in cluster_labels: + x_data_i[c] = 1 + + # x_data_i /= len(cluster_labels) + + pred_y_i = np.argmax(result) + y_labels.append(pred_y_i) + x_data.append(x_data_i) + + if len(np.unique(y_labels)) < 2: + logging.info("Warning: The test samples in the dataset is limited.\n \ + NormLIME may have no effect on the results.\n \ + Try to add more test samples, or see the results of LIME.") + num_classes = np.max(np.unique(y_labels)) + 1 + normlime_weights_all_labels = {} + for class_index in range(num_classes): + w = np.ones((num_features)) / num_features + normlime_weights_all_labels[class_index] = { + i: wi + for i, wi in enumerate(w) + } + logging.info("Saving the computed normlime_weights in {}".format( + save_path)) + + np.save(save_path, normlime_weights_all_labels) + return save_path + + clf = LogisticRegression(multi_class='multinomial', max_iter=1000) + clf.fit(x_data, y_labels) + + num_classes = np.max(np.unique(y_labels)) + 1 + normlime_weights_all_labels = {} + + if len(y_labels) / len(np.unique(y_labels)) < 3: + logging.info("Warning: The test samples in the dataset is limited.\n \ + NormLIME may have no effect on the results.\n \ + Try to add more test samples, or see the results of LIME.") + + if len(np.unique(y_labels)) == 2: + # binary: clf.coef_ has shape of [1, num_features] + for class_index in range(num_classes): + if class_index not in clf.classes_: + w = np.ones((num_features)) / num_features + normlime_weights_all_labels[class_index] = { + i: wi + for i, wi in enumerate(w) + } + continue + + if clf.classes_[0] == class_index: + w = -clf.coef_[0] + else: + w = clf.coef_[0] + + # softmax + w = w - np.max(w) + exp_w = np.exp(w * 10) + w = exp_w / np.sum(exp_w) + + normlime_weights_all_labels[class_index] = { + i: wi + for i, wi in enumerate(w) + } + else: + # clf.coef_ has shape of [len(np.unique(y_labels)), num_features] + for class_index in range(num_classes): + if class_index not in clf.classes_: + w = np.ones((num_features)) / num_features + normlime_weights_all_labels[class_index] = { + i: wi + for i, wi in enumerate(w) + } + continue + + coef_class_index = np.where(clf.classes_ == class_index)[0][0] + w = clf.coef_[coef_class_index] + + # softmax + w = w - np.max(w) + exp_w = np.exp(w * 10) + w = exp_w / np.sum(exp_w) + + normlime_weights_all_labels[class_index] = { + i: wi + for i, wi in enumerate(w) + } + + logging.info("Saving the computed normlime_weights in {}".format( + save_path)) + np.save(save_path, normlime_weights_all_labels) + + return save_path diff --git a/paddlex/interpret/interpretation_predict.py b/paddlex/interpret/interpretation_predict.py index 198f949ac7f13117fb51b7240d532eabf1c669eb..31b3b47e86613f62ba1c63b4ba2041357cc6bdc7 100644 --- a/paddlex/interpret/interpretation_predict.py +++ b/paddlex/interpret/interpretation_predict.py @@ -13,17 +13,26 @@ # limitations under the License. import numpy as np +import cv2 +import copy + def interpretation_predict(model, images): - model.arrange_transforms( - transforms=model.test_transforms, mode='test') + images = images.astype('float32') + model.arrange_transforms(transforms=model.test_transforms, mode='test') + tmp_transforms = copy.deepcopy(model.test_transforms.transforms) + model.test_transforms.transforms = model.test_transforms.transforms[-2:] + new_imgs = [] for i in range(images.shape[0]): - img = images[i] - new_imgs.append(model.test_transforms(img)[0]) + images[i] = cv2.cvtColor(images[i], cv2.COLOR_RGB2BGR) + new_imgs.append(model.test_transforms(images[i])[0]) + new_imgs = np.array(new_imgs) - result = model.exe.run( - model.test_prog, - feed={'image': new_imgs}, - fetch_list=list(model.interpretation_feats.values())) - return result \ No newline at end of file + out = model.exe.run(model.test_prog, + feed={'image': new_imgs}, + fetch_list=list(model.interpretation_feats.values())) + + model.test_transforms.transforms = tmp_transforms + + return out diff --git a/paddlex/interpret/visualize.py b/paddlex/interpret/visualize.py index de8e9151b9417fd3307c74d7bb67767bed1845c7..c1b013d04b9b21a49ecf7eeb6dd65b6d6c578069 100644 --- a/paddlex/interpret/visualize.py +++ b/paddlex/interpret/visualize.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import cv2 @@ -20,79 +20,79 @@ import numpy as np import paddlex as pdx from .interpretation_predict import interpretation_predict from .core.interpretation import Interpretation -from .core.normlime_base import precompute_normlime_weights +from .core.normlime_base import precompute_global_classifier from .core._session_preparation import gen_user_home - -def lime(img_file, - model, - num_samples=3000, - batch_size=50, - save_dir='./'): - """使用LIME算法将模型预测结果的可解释性可视化。 - + + +def lime(img_file, model, num_samples=3000, batch_size=50, save_dir='./'): + """使用LIME算法将模型预测结果的可解释性可视化。 + LIME表示与模型无关的局部可解释性,可以解释任何模型。LIME的思想是以输入样本为中心, 在其附近的空间中进行随机采样,每个采样通过原模型得到新的输出,这样得到一系列的输入 和对应的输出,LIME用一个简单的、可解释的模型(比如线性回归模型)来拟合这个映射关系, - 得到每个输入维度的权重,以此来解释模型。 - + 得到每个输入维度的权重,以此来解释模型。 + 注意:LIME可解释性结果可视化目前只支持分类模型。 - + Args: img_file (str): 预测图像路径。 model (paddlex.cv.models): paddlex中的模型。 num_samples (int): LIME用于学习线性模型的采样数,默认为3000。 batch_size (int): 预测数据batch大小,默认为50。 - save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。 + save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。 """ assert model.model_type == 'classifier', \ 'Now the interpretation visualize only be supported in classifier!' if model.status != 'Normal': - raise Exception('The interpretation only can deal with the Normal model') + raise Exception( + 'The interpretation only can deal with the Normal model') if not osp.exists(save_dir): os.makedirs(save_dir) - model.arrange_transforms( - transforms=model.test_transforms, mode='test') + model.arrange_transforms(transforms=model.test_transforms, mode='test') tmp_transforms = copy.deepcopy(model.test_transforms) tmp_transforms.transforms = tmp_transforms.transforms[:-2] img = tmp_transforms(img_file)[0] img = np.around(img).astype('uint8') img = np.expand_dims(img, axis=0) interpreter = None - interpreter = get_lime_interpreter(img, model, num_samples=num_samples, batch_size=batch_size) + interpreter = get_lime_interpreter( + img, model, num_samples=num_samples, batch_size=batch_size) img_name = osp.splitext(osp.split(img_file)[-1])[0] - interpreter.interpret(img, save_dir=save_dir) - - -def normlime(img_file, - model, - dataset=None, - num_samples=3000, - batch_size=50, - save_dir='./'): + interpreter.interpret(img, save_dir=osp.join(save_dir, img_name)) + + +def normlime(img_file, + model, + dataset=None, + num_samples=3000, + batch_size=50, + save_dir='./', + normlime_weights_file=None): """使用NormLIME算法将模型预测结果的可解释性可视化。 - + NormLIME是利用一定数量的样本来出一个全局的解释。NormLIME会提前计算一定数量的测 试样本的LIME结果,然后对相同的特征进行权重的归一化,这样来得到一个全局的输入和输出的关系。 - + 注意1:dataset读取的是一个数据集,该数据集不宜过大,否则计算时间会较长,但应包含所有类别的数据。 注意2:NormLIME可解释性结果可视化目前只支持分类模型。 - + Args: img_file (str): 预测图像路径。 model (paddlex.cv.models): paddlex中的模型。 dataset (paddlex.datasets): 数据集读取器,默认为None。 num_samples (int): LIME用于学习线性模型的采样数,默认为3000。 batch_size (int): 预测数据batch大小,默认为50。 - save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。 + save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。 + normlime_weights_file (str): NormLIME初始化文件名,若不存在,则计算一次,保存于该路径;若存在,则直接载入。 """ assert model.model_type == 'classifier', \ 'Now the interpretation visualize only be supported in classifier!' if model.status != 'Normal': - raise Exception('The interpretation only can deal with the Normal model') + raise Exception( + 'The interpretation only can deal with the Normal model') if not osp.exists(save_dir): os.makedirs(save_dir) - model.arrange_transforms( - transforms=model.test_transforms, mode='test') + model.arrange_transforms(transforms=model.test_transforms, mode='test') tmp_transforms = copy.deepcopy(model.test_transforms) tmp_transforms.transforms = tmp_transforms.transforms[:-2] img = tmp_transforms(img_file)[0] @@ -100,52 +100,48 @@ def normlime(img_file, img = np.expand_dims(img, axis=0) interpreter = None if dataset is None: - raise Exception('The dataset is None. Cannot implement this kind of interpretation') - interpreter = get_normlime_interpreter(img, model, dataset, - num_samples=num_samples, batch_size=batch_size, - save_dir=save_dir) + raise Exception( + 'The dataset is None. Cannot implement this kind of interpretation') + interpreter = get_normlime_interpreter( + img, + model, + dataset, + num_samples=num_samples, + batch_size=batch_size, + save_dir=save_dir, + normlime_weights_file=normlime_weights_file) img_name = osp.splitext(osp.split(img_file)[-1])[0] - interpreter.interpret(img, save_dir=save_dir) - - + interpreter.interpret(img, save_dir=osp.join(save_dir, img_name)) + + def get_lime_interpreter(img, model, num_samples=3000, batch_size=50): def predict_func(image): - image = image.astype('float32') - for i in range(image.shape[0]): - image[i] = cv2.cvtColor(image[i], cv2.COLOR_RGB2BGR) - tmp_transforms = copy.deepcopy(model.test_transforms.transforms) - model.test_transforms.transforms = model.test_transforms.transforms[-2:] out = interpretation_predict(model, image) - model.test_transforms.transforms = tmp_transforms return out[0] + labels_name = None if hasattr(model, 'labels'): labels_name = model.labels - interpreter = Interpretation('lime', - predict_func, - labels_name, - num_samples=num_samples, - batch_size=batch_size) + interpreter = Interpretation( + 'lime', + predict_func, + labels_name, + num_samples=num_samples, + batch_size=batch_size) return interpreter -def get_normlime_interpreter(img, model, dataset, num_samples=3000, batch_size=50, save_dir='./'): - def precompute_predict_func(image): - image = image.astype('float32') - tmp_transforms = copy.deepcopy(model.test_transforms.transforms) - model.test_transforms.transforms = model.test_transforms.transforms[-2:] - out = interpretation_predict(model, image) - model.test_transforms.transforms = tmp_transforms - return out[0] +def get_normlime_interpreter(img, + model, + dataset, + num_samples=3000, + batch_size=50, + save_dir='./', + normlime_weights_file=None): def predict_func(image): - image = image.astype('float32') - for i in range(image.shape[0]): - image[i] = cv2.cvtColor(image[i], cv2.COLOR_RGB2BGR) - tmp_transforms = copy.deepcopy(model.test_transforms.transforms) - model.test_transforms.transforms = model.test_transforms.transforms[-2:] out = interpretation_predict(model, image) - model.test_transforms.transforms = tmp_transforms return out[0] + labels_name = None if dataset is not None: labels_name = dataset.labels @@ -157,28 +153,29 @@ def get_normlime_interpreter(img, model, dataset, num_samples=3000, batch_size=5 os.makedirs(root_path) url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz" pdx.utils.download_and_decompress(url, path=root_path) - npy_dir = precompute_for_normlime(precompute_predict_func, - dataset, - num_samples=num_samples, - batch_size=batch_size, - save_dir=save_dir) - interpreter = Interpretation('normlime', - predict_func, - labels_name, - num_samples=num_samples, - batch_size=batch_size, - normlime_weights=npy_dir) - return interpreter - -def precompute_for_normlime(predict_func, dataset, num_samples=3000, batch_size=50, save_dir='./'): - image_list = [] - for item in dataset.file_list: - image_list.append(item[0]) - return precompute_normlime_weights( - image_list, + if osp.exists(osp.join(save_dir, normlime_weights_file)): + normlime_weights_file = osp.join(save_dir, normlime_weights_file) + try: + np.load(normlime_weights_file, allow_pickle=True).item() + except: + normlime_weights_file = precompute_global_classifier( + dataset, + predict_func, + save_path=normlime_weights_file, + batch_size=batch_size) + else: + normlime_weights_file = precompute_global_classifier( + dataset, predict_func, - num_samples=num_samples, - batch_size=batch_size, - save_dir=save_dir) - + save_path=osp.join(save_dir, normlime_weights_file), + batch_size=batch_size) + + interpreter = Interpretation( + 'normlime', + predict_func, + labels_name, + num_samples=num_samples, + batch_size=batch_size, + normlime_weights=normlime_weights_file) + return interpreter diff --git a/paddlex/utils/__init__.py b/paddlex/utils/__init__.py index ff774c985feb6ffc24a3e8c67237cdff0a074ee4..19c86d754b9b99219fdbf7be4b5e7fa6cffe6346 100644 --- a/paddlex/utils/__init__.py +++ b/paddlex/utils/__init__.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from . import logging diff --git a/paddlex/utils/logging.py b/paddlex/utils/logging.py index c118a28782528b727bf9af4591d07714cddae6ae..adfcea515273286f37921ec13999fb2234ce404f 100644 --- a/paddlex/utils/logging.py +++ b/paddlex/utils/logging.py @@ -29,13 +29,11 @@ def log(level=2, message="", use_color=False): current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array) if paddlex.log_level >= level: if use_color: - print("\033[1;31;40m{} [{}]\t{}\033[0m".format( - current_time, levels[level], - message).encode("utf-8").decode("latin1")) + print("\033[1;31;40m{} [{}]\t{}\033[0m".format(current_time, levels[ + level], message).encode("utf-8").decode("latin1")) else: - print( - "{} [{}]\t{}".format(current_time, levels[level], - message).encode("utf-8").decode("latin1")) + print("{} [{}]\t{}".format(current_time, levels[level], message) + .encode("utf-8").decode("latin1")) sys.stdout.flush() @@ -51,6 +49,7 @@ def warning(message="", use_color=True): log(level=1, message=message, use_color=use_color) -def error(message="", use_color=True): +def error(message="", use_color=True, exit=True): log(level=0, message=message, use_color=use_color) - sys.exit(-1) + if exit: + sys.exit(-1) diff --git a/setup.py b/setup.py index db62ca5e9e8107f2f32e804a0e92fb48766d3c27..44aca0f9dc2a214ff4bcf4e2817d06423c26812b 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ long_description = "PaddleX. A end-to-end deeplearning model development toolkit setuptools.setup( name="paddlex", - version='1.0.5', + version='1.0.6', author="paddlex", author_email="paddlex@baidu.com", description=long_description, diff --git a/tutorials/interpret/normlime.py b/tutorials/interpret/normlime.py index 3e501388e44aeab8548ae123831bc3211b08cea7..f3a1129780ab87d6d242010a124760c9a64608bd 100644 --- a/tutorials/interpret/normlime.py +++ b/tutorials/interpret/normlime.py @@ -14,18 +14,22 @@ model_file = 'https://bj.bcebos.com/paddlex/interpret/mini_imagenet_veg_mobilene pdx.utils.download_and_decompress(model_file, path='./') # 加载模型 -model = pdx.load_model('mini_imagenet_veg_mobilenetv2') +model_file = 'mini_imagenet_veg_mobilenetv2' +model = pdx.load_model(model_file) # 定义测试所用的数据集 +dataset = 'mini_imagenet_veg' test_dataset = pdx.datasets.ImageNet( - data_dir='mini_imagenet_veg', - file_list=osp.join('mini_imagenet_veg', 'test_list.txt'), - label_list=osp.join('mini_imagenet_veg', 'labels.txt'), + data_dir=dataset, + file_list=osp.join(dataset, 'test_list.txt'), + label_list=osp.join(dataset, 'labels.txt'), transforms=model.test_transforms) # 可解释性可视化 pdx.interpret.normlime( - 'mini_imagenet_veg/mushroom/n07734744_1106.JPEG', - model, - test_dataset, - save_dir='./') + test_dataset.file_list[0][0], + model, + test_dataset, + save_dir='./', + normlime_weights_file='{}_{}.npy'.format( + dataset.split('/')[-1], model.model_name))