diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5a750a0aef715e1c89d6dfe7de51f41b8918950f..6c881969b76d907ca804b0e73a0dc913c56d2bee 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,6 +35,6 @@ - id: cpplint-cpp-source name: cpplint description: Check C++ code style using cpplint.py. - entry: bash cpplint_pre_commit.hook + entry: bash ./tools/codestyle/cpplint_pre_commit.hook language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$ diff --git a/deploy/cpp/include/paddlex/results.h b/deploy/cpp/include/paddlex/results.h index de90c4a85130f42c0201f0d671fd3e2d53b0f37d..1643c9249e8e8e993017c7702d1d490352c2d9a8 100644 --- a/deploy/cpp/include/paddlex/results.h +++ b/deploy/cpp/include/paddlex/results.h @@ -63,9 +63,10 @@ class SegResult : public BaseResult { public: Mask label_map; Mask score_map; + std::string type = "seg"; void clear() { label_map.clear(); score_map.clear(); } }; -} // namespce of PaddleX +} // namespace PaddleX diff --git a/deploy/cpp/src/paddlex.cpp b/deploy/cpp/src/paddlex.cpp index fb7c12c2e47b6bdc030ccef36bac1277e021436e..b3e292c23e781d675ad7e23512fe96672d4b8121 100644 --- a/deploy/cpp/src/paddlex.cpp +++ b/deploy/cpp/src/paddlex.cpp @@ -65,6 +65,15 @@ bool Model::load_config(const std::string& model_dir) { YAML::Node config = YAML::LoadFile(yaml_file); type = config["_Attributes"]["model_type"].as(); name = config["Model"].as(); + std::string version = config["version"].as(); + if (version[0] == '0') { + std::cerr << "[Init] Version of the loaded model is lower than 1.0.0, deployment " + << "cannot be done, please refer to " + << "https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/tutorials/deploy/upgrade_version.md " + << "to transfer version." + << std::endl; + return false; + } bool to_rgb = true; if (config["TransformsMode"].IsDefined()) { std::string mode = config["TransformsMode"].as(); diff --git a/deploy/lite/export_lite.py b/deploy/lite/export_lite.py index b56aee9ee803b943473178b67f38c0f8d41a00da..0286d8733868dfbbaceadbfcf7d6728e367341df 100644 --- a/deploy/lite/export_lite.py +++ b/deploy/lite/export_lite.py @@ -19,30 +19,30 @@ import argparse def export_lite(): opt = lite.Opt() - model_file = os.path.join(FLAGS.model_path, '__model__') - params_file = os.path.join(FLAGS.model_path, '__params__') - opt.run_optimize("", model_file, params_file, FLAGS.place, FLAGS.save_dir) + model_file = os.path.join(FLAGS.model_dir, '__model__') + params_file = os.path.join(FLAGS.model_dir, '__params__') + opt.run_optimize("", model_file, params_file, FLAGS.place, FLAGS.save_file) if __name__ == '__main__': parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - "--model_path", + "--model_dir", type=str, default="", - help="model path.", + help="path of '__model__' and '__params__'.", required=True) parser.add_argument( "--place", type=str, default="arm", - help="preprocess config path.", + help="run place: 'arm|opencl|x86|npu|xpu|rknpu|apu'.", required=True) parser.add_argument( - "--save_dir", + "--save_file", type=str, default="paddlex.onnx", - help="Directory for storing the output visualization files.", + help="file name for storing the output files.", required=True) FLAGS = parser.parse_args() export_lite() diff --git a/docs/apis/deploy.md b/docs/apis/deploy.md index ad97dc23957104cebc7e487d38554cb06ced57f2..4ddc0d90a5a2ac17ab9c9154bddf2421489fd8a7 100755 --- a/docs/apis/deploy.md +++ b/docs/apis/deploy.md @@ -1,6 +1,6 @@ -# Predictor部署-paddlex.deploy +# 预测部署-paddlex.deploy -使用AnalysisPredictor进行预测部署。 +使用Paddle Inference进行高性能的Python预测部署。更多关于Paddle Inference信息请参考[Paddle Inference文档](https://paddle-inference.readthedocs.io/en/latest/#) ## Predictor类 @@ -22,6 +22,7 @@ paddlex.deploy.Predictor(model_dir, use_gpu=False, gpu_id=0, use_mkl=False, use_ > > > > ``` > > import paddlex +> > > > model = paddlex.deploy.Predictor(model_dir, use_gpu=True) > > result = model.predict(image_file) > > ``` diff --git a/docs/apis/transforms/augment.md b/docs/apis/transforms/augment.md index bc37d4853034f42361edd23e2719c119cda5a9bb..4ed04ca064cad113a2375dc3375d651572b374c1 100644 --- a/docs/apis/transforms/augment.md +++ b/docs/apis/transforms/augment.md @@ -9,7 +9,7 @@ PaddleX对于图像分类、目标检测、实例分割和语义分割内置了 | 任务类型 | 增强方法 | | :------- | :------------| | 图像分类 | [RandomCrop](cls_transforms.html#randomcrop)、[RandomHorizontalFlip](cls_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](cls_transforms.html#randomverticalflip)、
[RandomRotate](cls_transforms.html#randomratate)、 [RandomDistort](cls_transforms.html#randomdistort) | -|目标检测
实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)、[RandomDistort](det_transforms.html#randomdistort)、[RandomCrop](det_transforms.html#randomcrop)、
[[MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)、RandomExpand](det_transforms.html#randomexpand) | +|目标检测
实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)、[RandomDistort](det_transforms.html#randomdistort)、[RandomCrop](det_transforms.html#randomcrop)、
[MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)、[RandomExpand](det_transforms.html#randomexpand) | |语义分割 | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](seg_transforms.html#randomverticalflip)、[RandomRangeScaling](seg_transforms.html#randomrangescaling)、
[RandomStepScaling](seg_transforms.html#randomstepscaling)、[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)、 [RandomBlur](seg_transforms.html#randomblur)、
[RandomRotation](seg_transforms.html#randomrotation)、[RandomScaleAspect](seg_transforms.html#randomscaleaspect)、[RandomDistort](seg_transforms.html#randomdistort) | ## imgaug增强库的支持 diff --git a/docs/apis/transforms/cls_transforms.md b/docs/apis/transforms/cls_transforms.md index ccffc30292c7b5dea81ba6fe62a42a1810203ca9..9b762a79606f43d6672eeb0ea6d413621ff069bd 100755 --- a/docs/apis/transforms/cls_transforms.md +++ b/docs/apis/transforms/cls_transforms.md @@ -15,7 +15,7 @@ paddlex.cls.transforms.Compose(transforms) ## RandomCrop类 ```python -paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.88, lower_ratio=3. / 4, upper_ratio=4. / 3) +paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.08, lower_ratio=3. / 4, upper_ratio=4. / 3) ``` 对图像进行随机剪裁,模型训练时的数据增强操作。 @@ -26,7 +26,7 @@ paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.88, lower_ratio=3 ### 参数 * **crop_size** (int): 随机裁剪后重新调整的目标边长。默认为224。 -* **lower_scale** (float): 裁剪面积相对原面积比例的最小限制。默认为0.88。 +* **lower_scale** (float): 裁剪面积相对原面积比例的最小限制。默认为0.08。 * **lower_ratio** (float): 宽变换比例的最小限制。默认为3. / 4。 * **upper_ratio** (float): 宽变换比例的最小限制。默认为4. / 3。 diff --git a/docs/appendix/index.rst b/docs/appendix/index.rst index c3198420d394372a359eba151fd58812d635c58a..c402384ebc307713ed87055dc86cab58dcf33bbe 100755 --- a/docs/appendix/index.rst +++ b/docs/appendix/index.rst @@ -8,6 +8,7 @@ model_zoo.md metrics.md + interpret.md parameters.md how_to_convert_dataset.md datasets.md diff --git a/docs/appendix/interpret.md b/docs/appendix/interpret.md new file mode 100644 index 0000000000000000000000000000000000000000..886620df2fa98c03abda4717dea627277715b2d9 --- /dev/null +++ b/docs/appendix/interpret.md @@ -0,0 +1,31 @@ +# PaddleX可解释性 + +目前深度学习模型普遍存在一个问题,因为使用模型预测还是一个黑盒,几乎无法去感知它的内部工作状态,预测结果的可信度一直遭到质疑。为此,PadlleX提供了2种对图像分类预测结果进行可解释性研究的算法:LIME和NormLIME。 + +## LIME +LIME全称Local interpretable model-agnostic explanations,表示一种与模型无关的局部可解释性。其实现步骤主要如下: +1. 获取图像的超像素。 +2. 以输入样本为中心,在其附近的空间中进行随机采样,每个采样即对对象中的超像素进行随机遮掩(每个采样的权重和该采样与原样本的距离成反比)。 +3. 每个采样通过预测模型得到新的输出,这样得到一系列的输入`X`和对应的输出`Y`。 +4. 将`X`转换为超像素特征`F`,用一个简单的、可解释的模型`Model`(这里使用岭回归)来拟合`F`和`Y`的映射关系。 +5. `Model`将得到`F`每个输入维度的权重(每个维度代表一个超像素),以此来解释模型。 + +LIME的使用方式可参见[代码示例](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/lime.py)和[api介绍](../apis/visualize.html#lime)。在使用时,参数中的`num_samples`设置尤为重要,其表示上述步骤2中的随机采样的个数,若设置过小会影响可解释性结果的稳定性,若设置过大则将在上述步骤3耗费较长时间;参数`batch_size`则表示在计算上述步骤3时,预测的batch size,若设置过小将在上述步骤3耗费较长时间,而上限则根据机器配置决定。 + +最终LIME可解释性算法的可视化结果如下所示: +![](../images/lime.png) +图中绿色区域代表起正向作用的超像素,红色区域代表起反向作用的超像素,"First n superpixels"代表前n个权重比较大的超像素(由上述步骤5计算所得结果)。 + + +## NormLIME +NormLIME是在LIME上的改进,LIME的解释是局部性的,是针对当前样本给的特定解释,而NormLIME是利用一定数量的样本对当前样本的一个全局性的解释,有一定的降噪效果。其实现步骤如下所示: +1. 下载Kmeans模型参数和ResNet50_vc网络前三层参数。(ResNet50_vc的参数是在ImageNet上训练所得网络的参数;使用ImageNet图像作为数据集,每张图像从ResNet50_vc的第三层输出提取对应超象素位置上的平均特征和质心上的特征,训练将得到此处的Kmeans模型) +2. 计算测试集中每张图像的LIME结果。(如无测试集,可用验证集代替) +3. 使用Kmeans模型对所有图像中的所有像素进行聚类。 +4. 对在同一个簇的超像素(相同的特征)进行权重的归一化,得到每个超像素的权重,以此来解释模型。 + +NormLIME的使用方式可参见[代码示例](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/normlime.py)和[api介绍](../apis/visualize.html#normlime)。在使用时,参数中的`num_samples`设置尤为重要,其表示上述步骤2中的随机采样的个数,若设置过小会影响可解释性结果的稳定性,若设置过大则将在上述步骤3耗费较长时间;参数`batch_size`则表示在计算上述步骤3时,预测的batch size,若设置过小将在上述步骤3耗费较长时间,而上限则根据机器配置决定;而`dataset`则是由测试集或验证集构造的数据。 + +最终NormLIME可解释性算法的可视化结果如下所示: +![](../images/normlime.png) +图中绿色区域代表起正向作用的超像素,红色区域代表起反向作用的超像素,"First n superpixels"代表前n个权重比较大的超像素(由上述步骤5计算所得结果)。图中最后一行代表把LIME和NormLIME对应超像素权重相乘的结果。 \ No newline at end of file diff --git a/docs/images/lime.png b/docs/images/lime.png new file mode 100644 index 0000000000000000000000000000000000000000..de435a2e2375a788319f0d80a4cce7a21d395e41 Binary files /dev/null and b/docs/images/lime.png differ diff --git a/docs/images/normlime.png b/docs/images/normlime.png new file mode 100644 index 0000000000000000000000000000000000000000..4e5099347f261d3f5ce47b93d28cfa484c1d3776 Binary files /dev/null and b/docs/images/normlime.png differ diff --git a/docs/index.rst b/docs/index.rst index c3276f301bcd54846ba3639441572a26a5dd0f6a..0f876e6337ce4ea181b7558a5654808774f50572 100755 --- a/docs/index.rst +++ b/docs/index.rst @@ -32,7 +32,7 @@ PaddleX是基于飞桨核心框架、开发套件和工具组件的深度学习 * PaddleX版本: v1.0.0 * 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex -* 项目GitHub: https://github.com/PaddlePaddle/PaddleX/tree/develop +* 项目GitHub: https://github.com/PaddlePaddle/PaddleX * 官方QQ用户群: 1045148026 * GitHub Issue反馈: http://www.github.com/PaddlePaddle/PaddleX/issues diff --git a/docs/quick_start.md b/docs/quick_start.md index dafb902fc66a83de4f5aa6066dd6094b1baa4b86..a24cdadf410abd738750a18b4f5e99b8265cb7d3 100644 --- a/docs/quick_start.md +++ b/docs/quick_start.md @@ -61,7 +61,7 @@ eval_dataset = pdx.datasets.ImageNet( 本文档中使用百度基于蒸馏方法得到的MobileNetV3预训练模型,模型结构与MobileNetV3一致,但精度更高。PaddleX内置了20多种分类模型,查阅[PaddleX模型库](appendix/model_zoo.md)了解更多分类模型。 ``` num_classes = len(train_dataset.labels) -model.pdx.cls.MobileNetV3_small_ssld(num_classes=num_classes) +model = pdx.cls.MobileNetV3_small_ssld(num_classes=num_classes) ``` ### 3.4 定义训练参数 @@ -86,7 +86,7 @@ python train.py ## 5. 训练过程中查看训练指标 模型在训练过程中,所有的迭代信息将以标注输出流的形式,输出到命令执行的终端上,用户也可通过visualdl以可视化的方式查看训练指标的变化,通过如下方式启动visualdl后,在浏览器打开https://0.0.0.0:8001 (或 https://localhost:8001)即可。 ``` -visualdl --logdir output/mobilenetv2/vdl_log --port 8000 +visualdl --logdir output/mobilenetv2/vdl_log --port 8001 ``` ![](./images/vdl1.jpg) diff --git a/docs/tutorials/dataset_prepare.md b/docs/tutorials/dataset_prepare.md index 87f368bc29d80aff12b9f6597a1d3901b427c4d7..95d1094c216857d4dc708cf39be74dca98d78f59 100644 --- a/docs/tutorials/dataset_prepare.md +++ b/docs/tutorials/dataset_prepare.md @@ -1,7 +1,3 @@ # 数据准备 -## 数据标注 - -## 主流标注软件支持 - -## EasyData数据标注支持 +该部分内容已迁移至[附录](../appendix/datasets.md) diff --git a/docs/tutorials/deploy/deploy_lite.md b/docs/tutorials/deploy/deploy_lite.md index 392e945dea2465ca4f6f40f2a131f7cad19db03a..5419aed636545b95e9f98fdd45109592b7a6d9d6 100644 --- a/docs/tutorials/deploy/deploy_lite.md +++ b/docs/tutorials/deploy/deploy_lite.md @@ -1,5 +1,12 @@ # 移动端部署 +PaddleX的移动端部署由PaddleLite实现,部署的流程如下,首先将训练好的模型导出为inference model,然后使用PaddleLite的python接口对模型进行优化,最后使用PaddleLite的预测库进行部署, +PaddleLite的详细介绍和使用可参考:[PaddleLite文档](https://paddle-lite.readthedocs.io/zh/latest/) + +> PaddleX --> Inference Model --> PaddleLite Opt --> PaddleLite Inference + +以下介绍如何将PaddleX导出为inference model,然后使用PaddleLite的OPT模块对模型进行优化: + step 1: 安装PaddleLite ``` @@ -9,14 +16,21 @@ pip install paddlelite step 2: 将PaddleX模型导出为inference模型 参考[导出inference模型](deploy_server/deploy_python.html#inference)将模型导出为inference格式模型。 +**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](./upgrade_version.md)对模型版本进行升级。** step 3: 将inference模型转换成PaddleLite模型 ``` -python /path/to/PaddleX/deploy/lite/export_lite.py --model_path /path/to/inference_model --save_dir /path/to/onnx_model +python /path/to/PaddleX/deploy/lite/export_lite.py --model_dir /path/to/inference_model --save_file /path/to/onnx_model --place place/to/run + ``` -`--model_path`用于指定inference模型的路径,`--save_dir`用于指定Lite模型的保存路径。 +| 参数 | 说明 | +| ---- | ---- | +| model_dir | 预测模型所在路径,包含"__model__", "__params__"文件 | +| save_file | 模型输出的名称,默认为"paddlex.nb" | +| place | 运行的平台,可选:arm|opencl|x86|npu|xpu|rknpu|apu | + step 4: 预测 diff --git a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md index b4edf3510ae992d72ea60e1078f22e12d54357c2..ebd27b61559eb2e0e4146f3642b2637cb6ab70e2 100755 --- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md +++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md @@ -104,7 +104,8 @@ make ### Step5: 预测及可视化 -参考[导出inference模型](../deploy_python.html#inference)将模型导出为inference格式模型。 +参考[导出inference模型](../../deploy_python.html#inference)将模型导出为inference格式模型。 +**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。** 编译成功后,预测demo的可执行程序分别为`build/demo/detector`,`build/demo/classifer`,`build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下: diff --git a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md index 2f7c62766291410ec8e48a77b7e814edeb1523bb..0f70f7f90131bfb354b2eee493b6d863b99f3dcc 100755 --- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md +++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md @@ -100,6 +100,7 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens ### Step5: 预测及可视化 参考[导出inference模型](../deploy_python.html#inference)将模型导出为inference格式模型。 +**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。** 上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录: diff --git a/docs/tutorials/deploy/deploy_server/deploy_python.md b/docs/tutorials/deploy/deploy_server/deploy_python.md index c597f87cdbbc208ad2b72a8305642da41b9be5cd..321d48077fd0478234e8ce6386c7355c36d1c63c 100644 --- a/docs/tutorials/deploy/deploy_server/deploy_python.md +++ b/docs/tutorials/deploy/deploy_server/deploy_python.md @@ -20,6 +20,8 @@ paddlex --export_inference --model_dir=./xiaoduxiong_epoch_12 --save_dir=./infer ``` ## 预测部署 +**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。** + > 点击下载测试图片 [xiaoduxiong_test_image.tar.gz](https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_test_image.tar.gz) ``` diff --git a/docs/tutorials/deploy/deploy_server/encryption.md b/docs/tutorials/deploy/deploy_server/encryption.md index 71b07048ed8372b4c4b9aa0b2082dc9ed9f2f9a0..7090421823bb3bbe2017818a3fc2f7e96608dda9 100644 --- a/docs/tutorials/deploy/deploy_server/encryption.md +++ b/docs/tutorials/deploy/deploy_server/encryption.md @@ -61,7 +61,7 @@ paddlex-encryption ./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /path/to/paddlex_inference_model -save_dir /path/to/paddlex_encrypted_model ``` -`-model_dir`用于指定inference模型路径(参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型),可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=` +`-model_dir`用于指定inference模型路径(参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型),可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`(**注意**:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。)。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=` ![](../images/encrypt.png) diff --git a/docs/tutorials/deploy/upgrade_version.md b/docs/tutorials/deploy/upgrade_version.md new file mode 100644 index 0000000000000000000000000000000000000000..aac33928448d75bf6965fbadbd4ff114e6156196 --- /dev/null +++ b/docs/tutorials/deploy/upgrade_version.md @@ -0,0 +1,14 @@ +# 模型版本升级 + +由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,用户需要按照以下步骤对模型版本进行转换,转换后的模型可以在多端上完成部署。 + +## 检查模型版本 + +存放模型的文件夹存有一个`model.yml`文件,该文件的最后一行`version`值表示模型的版本号,若版本号小于1.0.0,则需要进行版本转换,若版本号大于及等于1.0.0,则不需要进行版本转换。 + +## 版本转换 + +``` +paddlex --export_inference --model_dir=/path/to/low_version_model --save_dir=SSpath/to/high_version_model +``` +`--model_dir`为版本号小于1.0.0的模型路径,可以是PaddleX训练过程保存的模型,也可以是导出为inference格式的模型。`--save_dir`为转换为高版本的模型,后续可用于多端部署。 \ No newline at end of file diff --git a/paddlex/cv/datasets/voc.py b/paddlex/cv/datasets/voc.py index 8633fe5305929c5238523a067af172720156d3d5..45335084a9f8ddd9b850b52f3a6db9fe6886a153 100644 --- a/paddlex/cv/datasets/voc.py +++ b/paddlex/cv/datasets/voc.py @@ -113,7 +113,7 @@ class VOCDetection(Dataset): is_crowd = np.zeros((len(objs), 1), dtype=np.int32) difficult = np.zeros((len(objs), 1), dtype=np.int32) for i, obj in enumerate(objs): - cname = obj.find('name').text + cname = obj.find('name').text.strip() gt_class[i][0] = cname2cid[cname] _difficult = int(obj.find('difficult').text) x1 = float(obj.find('bndbox').find('xmin').text) diff --git a/paddlex/cv/models/slim/prune.py b/paddlex/cv/models/slim/prune.py index 5a8c5189347fd935db0cc2a54eee9dea2cf7a6cb..810679d3d7cf70a14922a594af3468294f12d29c 100644 --- a/paddlex/cv/models/slim/prune.py +++ b/paddlex/cv/models/slim/prune.py @@ -42,7 +42,7 @@ def sensitivity(program, if pruned_ratios is None: pruned_ratios = np.arange(0.1, 1, step=0.1) - total_evaluate_iters = 1 + total_evaluate_iters = 0 for name in param_names: if name not in sensitivities: sensitivities[name] = {} @@ -52,12 +52,6 @@ def sensitivity(program, len(list(pruned_ratios)) - len(sensitivities[name])) eta = '-' start_time = time.time() - progress = 1.0 / total_evaluate_iters - progress = "%.2f%%" % (progress * 100) - logging.info( - "Total evaluate iters={}, current={}, progress={}, eta={}".format( - total_evaluate_iters, 1, progress, eta), - use_color=True) baseline = eval_func(graph.program) cost = time.time() - start_time eta = cost * (total_evaluate_iters - 1) @@ -73,7 +67,7 @@ def sensitivity(program, logging.info( "Total evaluate iters={}, current={}, progress={}, eta={}". format( - total_evaluate_iters, current_iter+1, progress, + total_evaluate_iters, current_iter, progress, seconds_to_hms( int(cost * (total_evaluate_iters - current_iter)))), use_color=True) diff --git a/paddlex/cv/models/slim/visualize.py b/paddlex/cv/models/slim/visualize.py index 083b177ea8c8878562070df3b617b32248046fea..79e885a9f9a51ff86fa24f73e12c9dbc869e0acc 100644 --- a/paddlex/cv/models/slim/visualize.py +++ b/paddlex/cv/models/slim/visualize.py @@ -50,7 +50,7 @@ def visualize(model, sensitivities_file, save_dir='./'): min(np.array(x)) - 0.01, max(np.array(x)) + 0.01, 0.05) my_y_ticks = np.arange(0.05, 1, 0.05) - plt.xticks(my_x_ticks, rotation=30, fontsize=8) + plt.xticks(my_x_ticks, rotation=15, fontsize=8) plt.yticks(my_y_ticks, fontsize=8) for a, b in zip(x, y): plt.text( diff --git a/paddlex/cv/transforms/cls_transforms.py b/paddlex/cv/transforms/cls_transforms.py index 55f58fcbefd7bc9a8e426aa3aa100dc0807f0f71..bcb8e6d38de9becacce4d80e2ff54588c15352f4 100644 --- a/paddlex/cv/transforms/cls_transforms.py +++ b/paddlex/cv/transforms/cls_transforms.py @@ -103,14 +103,14 @@ class RandomCrop(ClsTransform): Args: crop_size (int): 随机裁剪后重新调整的目标边长。默认为224。 - lower_scale (float): 裁剪面积相对原面积比例的最小限制。默认为0.88。 + lower_scale (float): 裁剪面积相对原面积比例的最小限制。默认为0.08。 lower_ratio (float): 宽变换比例的最小限制。默认为3. / 4。 upper_ratio (float): 宽变换比例的最大限制。默认为4. / 3。 """ def __init__(self, crop_size=224, - lower_scale=0.88, + lower_scale=0.08, lower_ratio=3. / 4, upper_ratio=4. / 3): self.crop_size = crop_size diff --git a/paddlex/cv/transforms/imgaug_support.py b/paddlex/cv/transforms/imgaug_support.py index 3924acd1821fc9fc4f85b8c486e4f2a20f4e8543..edaaba958d7501861ae36eac3dab8900af1ddb8f 100644 --- a/paddlex/cv/transforms/imgaug_support.py +++ b/paddlex/cv/transforms/imgaug_support.py @@ -23,7 +23,7 @@ def execute_imgaug(augmenter, im, bboxes=None, polygons=None, import imgaug.augmentables.bbs as bbs aug_im = im.astype('uint8') - aug_im = augmenter.augment(image=aug_im) + aug_im = augmenter.augment(image=aug_im).astype('float32') return aug_im # TODO imgaug的标注处理逻辑与paddlex已存的transform存在部分差异 diff --git a/paddlex/interpret/as_data_reader/readers.py b/paddlex/interpret/as_data_reader/readers.py index 5cd76c6400327637121b16ac585d4ac052ab74d7..d9244f17fa0a93f26589b29471ade59843b7d454 100644 --- a/paddlex/interpret/as_data_reader/readers.py +++ b/paddlex/interpret/as_data_reader/readers.py @@ -20,6 +20,7 @@ import six import glob from .data_path_utils import _find_classes from PIL import Image +import paddlex.utils.logging as logging def resize_short(img, target_size, interpolation=None): @@ -117,7 +118,7 @@ def read_image(img_path, target_size=256, crop_size=224): assert len(img_path.shape) == 4 return img_path else: - ValueError(f"Not recognized data type {type(img_path)}.") + ValueError("Not recognized data type {}.".format(type(img_path))) class ReaderConfig(object): @@ -156,7 +157,7 @@ class ReaderConfig(object): img = cv2.imread(img_path) if img is None: - print(img_path) + logging.info(img_path) continue img = resize_short(img, target_size, interpolation=None) img = crop_image(img, crop_size, center=self.is_test) @@ -208,7 +209,7 @@ def create_reader(list_image_path, list_label=None, is_test=False): img = cv2.imread(img_path) if img is None: - print(img_path) + logging.info(img_path) continue img = resize_short(img, target_size, interpolation=None) diff --git a/paddlex/interpret/core/interpretation_algorithms.py b/paddlex/interpret/core/interpretation_algorithms.py index 507e47bf372ab6ce81fbf9db4062cc5dae3a0b74..afcea8d2d92531590a1aef986014c5bfd792ea5e 100644 --- a/paddlex/interpret/core/interpretation_algorithms.py +++ b/paddlex/interpret/core/interpretation_algorithms.py @@ -21,6 +21,7 @@ from . import lime_base from ._session_preparation import paddle_get_fc_weights, compute_features_for_kmeans, gen_user_home from .normlime_base import combine_normlime_and_lime, get_feature_for_kmeans, load_kmeans_model from paddlex.interpret.as_data_reader.readers import read_image +import paddlex.utils.logging as logging import cv2 @@ -71,7 +72,8 @@ class CAM(object): if self.label_names is not None: ln = self.label_names[l] - print(f'predicted result: {ln} with probability {probability[pred_label[0]]:.3f}') + prob_str = "%.3f" % (probability[pred_label[0]]) + logging.info("predicted result: {} with probability {}.".format(ln, prob_str)) return feature_maps, fc_weights def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None): @@ -96,7 +98,8 @@ class CAM(object): ax.axis("off") axes = axes.ravel() axes[0].imshow(self.image) - axes[0].set_title(f"label {ln}, proba: {self.predicted_probability: .3f}") + prob_str = "{%.3f}" % (self.predicted_probability) + axes[0].set_title("label {}, proba: {}".format(ln, prob_str)) axes[1].imshow(cam) axes[1].set_title("CAM") @@ -157,14 +160,15 @@ class LIME(object): if self.label_names is not None: ln = self.label_names[l] - print(f'predicted result: {ln} with probability {probability[pred_label[0]]:.3f}') + prob_str = "%.3f" % (probability[pred_label[0]]) + logging.info("predicted result: {} with probability {}.".format(ln, prob_str)) end = time.time() algo = lime_base.LimeImageInterpreter() interpreter = algo.interpret_instance(self.image, self.predict_fn, self.labels, 0, num_samples=self.num_samples, batch_size=self.batch_size) self.lime_interpreter = interpreter - print('lime time: ', time.time() - end, 's.') + logging.info('lime time: ' + str(time.time() - end) + 's.') def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None): if self.lime_interpreter is None: @@ -189,7 +193,8 @@ class LIME(object): ax.axis("off") axes = axes.ravel() axes[0].imshow(self.image) - axes[0].set_title(f"label {ln}, proba: {self.predicted_probability: .3f}") + prob_str = "{%.3f}" % (self.predicted_probability) + axes[0].set_title("label {}, proba: {}".format(ln, prob_str)) axes[1].imshow(mark_boundaries(self.image, self.lime_interpreter.segments)) axes[1].set_title("superpixel segmentation") @@ -201,7 +206,7 @@ class LIME(object): l, positive_only=False, hide_rest=False, num_features=num_to_show ) axes[ncols + i].imshow(mark_boundaries(temp, mask)) - axes[ncols + i].set_title(f"label {ln}, first {num_to_show} superpixels") + axes[ncols + i].set_title("label {}, first {} superpixels".format(ln, num_to_show)) if save_to_disk and save_outdir is not None: os.makedirs(save_outdir, exist_ok=True) @@ -232,8 +237,9 @@ class NormLIME(object): raise ValueError("NormLIME needs the KMeans model, where we provided a default one in " "pre_models/kmeans_model.pkl.") else: - print("Warning: It is *strongly* suggested to use the default KMeans model in pre_models/kmeans_model.pkl. " - "Use another one will change the final result.") + logging.debug("Warning: It is *strongly* suggested to use the \ + default KMeans model in pre_models/kmeans_model.pkl. \ + Use another one will change the final result.") self.kmeans_model = load_kmeans_model(kmeans_model_for_normlime) self.num_samples = num_samples @@ -243,7 +249,7 @@ class NormLIME(object): self.normlime_weights = np.load(normlime_weights, allow_pickle=True).item() except: self.normlime_weights = None - print("Warning: not find the correct precomputed Normlime result.") + logging.debug("Warning: not find the correct precomputed Normlime result.") self.predict_fn = predict_fn @@ -289,8 +295,7 @@ class NormLIME(object): self.predicted_probability = self._lime.predicted_probability self.image = image_show[0] self.labels = self._lime.labels - # print(f'predicted result: {self.predicted_label} with probability {self.predicted_probability: .3f}') - print('performing NormLIME operations ...') + logging.info('performing NormLIME operations ...') cluster_labels = self.predict_cluster_labels( compute_features_for_kmeans(image_show).transpose((1, 2, 0)), self._lime.lime_interpreter.segments @@ -329,7 +334,8 @@ class NormLIME(object): axes = axes.ravel() axes[0].imshow(self.image) - axes[0].set_title(f"label {ln}, proba: {self.predicted_probability: .3f}") + prob_str = "{%.3f}" % (self.predicted_probability) + axes[0].set_title("label {}, proba: {}".format(ln, prob_str)) axes[1].imshow(mark_boundaries(self.image, self._lime.lime_interpreter.segments)) axes[1].set_title("superpixel segmentation") @@ -342,7 +348,7 @@ class NormLIME(object): l, positive_only=False, hide_rest=False, num_features=num_to_show ) axes[ncols + i].imshow(mark_boundaries(temp, mask)) - axes[ncols + i].set_title(f"LIME: first {num_to_show} superpixels") + axes[ncols + i].set_title("LIME: first {} superpixels".format(num_to_show)) # NormLIME visualization self._lime.lime_interpreter.local_weights = g_weights @@ -351,7 +357,7 @@ class NormLIME(object): l, positive_only=False, hide_rest=False, num_features=num_to_show ) axes[ncols * 2 + i].imshow(mark_boundaries(temp, mask)) - axes[ncols * 2 + i].set_title(f"NormLIME: first {num_to_show} superpixels") + axes[ncols * 2 + i].set_title("NormLIME: first {} superpixels".format(num_to_show)) # NormLIME*LIME visualization combined_weights = combine_normlime_and_lime(lime_weights, g_weights) @@ -361,7 +367,7 @@ class NormLIME(object): l, positive_only=False, hide_rest=False, num_features=num_to_show ) axes[ncols * 3 + i].imshow(mark_boundaries(temp, mask)) - axes[ncols * 3 + i].set_title(f"Combined: first {num_to_show} superpixels") + axes[ncols * 3 + i].set_title("Combined: first {} superpixels".format(num_to_show)) self._lime.lime_interpreter.local_weights = lime_weights @@ -433,32 +439,32 @@ def save_fig(data_, save_outdir, algorithm_name, num_samples=3000): import matplotlib.pyplot as plt if isinstance(data_, str): if algorithm_name == 'cam': - f_out = f"{algorithm_name}_{data_.split('/')[-1]}.png" + f_out = "{}_{}.png".format(algorithm_name, data_.split('/')[-1]) else: - f_out = f"{algorithm_name}_{data_.split('/')[-1]}_s{num_samples}.png" + f_out = "{}_{}_s{}.png".format(algorithm_name, data_.split('/')[-1], num_samples) plt.savefig( os.path.join(save_outdir, f_out) ) else: n = 0 if algorithm_name == 'cam': - f_out = f'cam-{n}.png' + f_out = 'cam-{}.png'.format(n) else: - f_out = f'{algorithm_name}_s{num_samples}-{n}.png' + f_out = '{}_s{}-{}.png'.format(algorithm_name, num_samples, n) while os.path.exists( os.path.join(save_outdir, f_out) ): n += 1 if algorithm_name == 'cam': - f_out = f'cam-{n}.png' + f_out = 'cam-{}.png'.format(n) else: - f_out = f'{algorithm_name}_s{num_samples}-{n}.png' + f_out = '{}_s{}-{}.png'.format(algorithm_name, num_samples, n) continue plt.savefig( os.path.join( save_outdir, f_out ) ) - print('The image of intrepretation result save in {}'.format(os.path.join( + logging.info('The image of intrepretation result save in {}'.format(os.path.join( save_outdir, f_out ))) diff --git a/paddlex/interpret/core/lime_base.py b/paddlex/interpret/core/lime_base.py index 23969b91fc29a1324fff1a48a41de3b7c2450d8c..3d3bd96d0e7b5ffb0de2d2f8156a03021cfad312 100644 --- a/paddlex/interpret/core/lime_base.py +++ b/paddlex/interpret/core/lime_base.py @@ -34,6 +34,7 @@ import scipy as sp import tqdm import copy from functools import partial +import paddlex.utils.logging as logging class LimeBase(object): @@ -230,9 +231,9 @@ class LimeBase(object): local_pred = easy_model.predict(neighborhood_data[0, used_features].reshape(1, -1)) if self.verbose: - print('Intercept', easy_model.intercept_) - print('Prediction_local', local_pred,) - print('Right:', neighborhood_labels[0, label]) + logging.info('Intercept' + str(easy_model.intercept_)) + logging.info('Prediction_local' + str(local_pred)) + logging.info('Right:' + str(neighborhood_labels[0, label])) return (easy_model.intercept_, sorted(zip(used_features, easy_model.coef_), key=lambda x: np.abs(x[1]), reverse=True), @@ -451,7 +452,6 @@ class LimeImageInterpreter(object): d = cdist(centroids, centroids, 'sqeuclidean') for x in np.unique(segments): - # print(np.argmin(d[x])) a = [image[segments == i] for i in np.argsort(d[x])[1:6]] mx = np.mean(np.concatenate(a), axis=0) fudged_image[segments == x] = mx diff --git a/paddlex/interpret/core/normlime_base.py b/paddlex/interpret/core/normlime_base.py index ca7b79c31cff02c018838d31e337b9c65762a7f6..3b3a94212ded51d1300b9ae78f4cdab0e1589903 100644 --- a/paddlex/interpret/core/normlime_base.py +++ b/paddlex/interpret/core/normlime_base.py @@ -21,6 +21,7 @@ from paddlex.interpret.as_data_reader.readers import read_image import paddlex.utils.logging as logging from . import lime_base from ._session_preparation import compute_features_for_kmeans, gen_user_home +import paddlex.utils.logging as logging def load_kmeans_model(fname): @@ -67,7 +68,6 @@ def centroid_using_superpixels(features, segments): one_list = np.zeros((len(np.unique(segments)), features.shape[2])) for i, r in enumerate(regions): one_list[i] = features[int(r.centroid[0] + 0.5), int(r.centroid[1] + 0.5), :] - # print(one_list.shape) return one_list @@ -85,7 +85,7 @@ def precompute_normlime_weights(list_data_, predict_fn, num_samples=3000, batch_ precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, save_dir) # load precomputed results, compute normlime weights and save. - fname_list = glob.glob(os.path.join(save_dir, f'lime_weights_s{num_samples}*.npy')) + fname_list = glob.glob(os.path.join(save_dir, 'lime_weights_s{}*.npy'.format(num_samples))) return compute_normlime_weights(fname_list, save_dir, num_samples) @@ -117,10 +117,10 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav for data_index, each_data_ in enumerate(list_data_): if isinstance(each_data_, str): - save_path = f"lime_weights_s{num_samples}_{each_data_.split('/')[-1].split('.')[0]}.npy" + save_path = "lime_weights_s{}_{}.npy".format(num_samples, each_data_.split('/')[-1].split('.')[0]) save_path = os.path.join(save_dir, save_path) else: - save_path = f"lime_weights_s{num_samples}_{data_index}.npy" + save_path = "lime_weights_s{}_{}.npy".format(num_samples, data_index) save_path = os.path.join(save_dir, save_path) if os.path.exists(save_path): @@ -174,16 +174,16 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples): normlime_weights_all_labels = {} + for f in a_list_lime_fnames: try: lime_weights_and_cluster = np.load(f, allow_pickle=True).item() lime_weights = lime_weights_and_cluster['lime_weights'] cluster = lime_weights_and_cluster['cluster'] except: - print('When loading precomputed LIME result, skipping', f) + logging.info('When loading precomputed LIME result, skipping' + str(f)) continue - print('Loading precomputed LIME result,', f) - + logging.info('Loading precomputed LIME result,' + str(f)) pred_labels = lime_weights.keys() for y in pred_labels: normlime_weights = normlime_weights_all_labels.get(y, {}) @@ -207,23 +207,23 @@ def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples): # check normlime if len(normlime_weights_all_labels.keys()) < max(normlime_weights_all_labels.keys()) + 1: - print( - "\n" - "Warning: !!! \n" - f"There are at least {max(normlime_weights_all_labels.keys()) + 1} classes, " - f"but the NormLIME has results of only {len(normlime_weights_all_labels.keys())} classes. \n" - "It may have cause unstable results in the later computation" - " but can be improved by computing more test samples." + logging.info( + "\n" + \ + "Warning: !!! \n" + \ + "There are at least {} classes, ".format(max(normlime_weights_all_labels.keys()) + 1) + \ + "but the NormLIME has results of only {} classes. \n".format(len(normlime_weights_all_labels.keys())) + \ + "It may have cause unstable results in the later computation" + \ + " but can be improved by computing more test samples." + \ "\n" ) n = 0 - f_out = f'normlime_weights_s{lime_num_samples}_samples_{len(a_list_lime_fnames)}-{n}.npy' + f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(lime_num_samples, len(a_list_lime_fnames), n) while os.path.exists( os.path.join(save_dir, f_out) ): n += 1 - f_out = f'normlime_weights_s{lime_num_samples}_samples_{len(a_list_lime_fnames)}-{n}.npy' + f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format(lime_num_samples, len(a_list_lime_fnames), n) continue np.save( diff --git a/paddlex/utils/utils.py b/paddlex/utils/utils.py index fe14022152db3a8f9853c93afd20745adb30c1f0..875a027f187661ab3ed44266c1b90780a55d518a 100644 --- a/paddlex/utils/utils.py +++ b/paddlex/utils/utils.py @@ -269,11 +269,9 @@ def load_pretrain_weights(exe, vars_to_load.append(var) logging.debug("Weight {} will be load".format(var.name)) - fluid.io.load_vars( - executor=exe, - dirname=weights_dir, - main_program=main_prog, - vars=vars_to_load) + params_dict = fluid.io.load_program_state( + weights_dir, var_list=vars_to_load) + fluid.io.set_program_state(main_prog, params_dict) if len(vars_to_load) == 0: logging.warning( "There is no pretrain weights loaded, maybe you should check you pretrain model!" diff --git a/tools/codestyle/clang_format.hook b/tools/codestyle/clang_format.hook new file mode 100755 index 0000000000000000000000000000000000000000..1d928216867c0ba3897d71542fea44debf8d72a0 --- /dev/null +++ b/tools/codestyle/clang_format.hook @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +readonly VERSION="3.8" + +version=$(clang-format -version) + +if ! [[ $version == *"$VERSION"* ]]; then + echo "clang-format version check failed." + echo "a version contains '$VERSION' is needed, but get '$version'" + echo "you can install the right version, and make an soft-link to '\$PATH' env" + exit -1 +fi + +clang-format $@ diff --git a/tools/codestyle/cpplint_pre_commit.hook b/tools/codestyle/cpplint_pre_commit.hook new file mode 100755 index 0000000000000000000000000000000000000000..630aeb8caaf88139fe8efae5c1f7e27f258d25c1 --- /dev/null +++ b/tools/codestyle/cpplint_pre_commit.hook @@ -0,0 +1,27 @@ +#!/bin/bash + +TOTAL_ERRORS=0 +if [[ ! $TRAVIS_BRANCH ]]; then + # install cpplint on local machine. + if [[ ! $(which cpplint) ]]; then + pip install cpplint + fi + # diff files on local machine. + files=$(git diff --cached --name-status | awk '$1 != "D" {print $2}') +else + # diff files between PR and latest commit on Travis CI. + branch_ref=$(git rev-parse "$TRAVIS_BRANCH") + head_ref=$(git rev-parse HEAD) + files=$(git diff --name-status $branch_ref $head_ref | awk '$1 != "D" {print $2}') +fi +# The trick to remove deleted files: https://stackoverflow.com/a/2413151 +for file in $files; do + if [[ $file =~ ^(patches/.*) ]]; then + continue; + else + cpplint --filter=-readability/fn_size $file; + TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?); + fi +done + +exit $TOTAL_ERRORS