diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5a750a0aef715e1c89d6dfe7de51f41b8918950f..377affbea0fc47f883f49bdafb540232dea06365 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,6 +23,7 @@ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ exclude: (?!.*third_party)^.*$ +- repo: local hooks: - id: clang-format-with-version-check name: clang-format @@ -31,10 +32,11 @@ language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$ +- repo: local hooks: - id: cpplint-cpp-source name: cpplint description: Check C++ code style using cpplint.py. - entry: bash cpplint_pre_commit.hook + entry: bash ./tools/codestyle/cpplint_pre_commit.hook language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$ diff --git a/deploy/cpp/demo/detector.cpp b/deploy/cpp/demo/detector.cpp index e5fc2800e2678aa26a15c9fa78d2de9b2e6e58ea..e42288fbccd434ef5953c606696af623323aa80d 100644 --- a/deploy/cpp/demo/detector.cpp +++ b/deploy/cpp/demo/detector.cpp @@ -66,7 +66,7 @@ int main(int argc, char** argv) { std::cout << "image file: " << image_path << ", predict label: " << result.boxes[i].category << ", label_id:" << result.boxes[i].category_id - << ", score: " << result.boxes[i].score << ", box:(" + << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0] << ", " << result.boxes[i].coordinate[1] << ", " << result.boxes[i].coordinate[2] << ", " @@ -89,7 +89,7 @@ int main(int argc, char** argv) { for (int i = 0; i < result.boxes.size(); ++i) { std::cout << ", predict label: " << result.boxes[i].category << ", label_id:" << result.boxes[i].category_id - << ", score: " << result.boxes[i].score << ", box:(" + << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0] << ", " << result.boxes[i].coordinate[1] << ", " << result.boxes[i].coordinate[2] << ", " diff --git a/deploy/cpp/include/paddlex/results.h b/deploy/cpp/include/paddlex/results.h index de90c4a85130f42c0201f0d671fd3e2d53b0f37d..1643c9249e8e8e993017c7702d1d490352c2d9a8 100644 --- a/deploy/cpp/include/paddlex/results.h +++ b/deploy/cpp/include/paddlex/results.h @@ -63,9 +63,10 @@ class SegResult : public BaseResult { public: Mask label_map; Mask score_map; + std::string type = "seg"; void clear() { label_map.clear(); score_map.clear(); } }; -} // namespce of PaddleX +} // namespace PaddleX diff --git a/deploy/cpp/include/paddlex/transforms.h b/deploy/cpp/include/paddlex/transforms.h index df8cff59afacbe313f1eec5bc9835a736442583e..f8265db447f693d084c5a789504bc4b0ccc14d28 100644 --- a/deploy/cpp/include/paddlex/transforms.h +++ b/deploy/cpp/include/paddlex/transforms.h @@ -83,7 +83,7 @@ class ResizeByShort : public Transform { } else { max_size_ = -1; } - }; + } virtual bool Run(cv::Mat* im, ImageBlob* data); private: @@ -96,7 +96,7 @@ class ResizeByLong : public Transform { public: virtual void Init(const YAML::Node& item) { long_size_ = item["long_size"].as(); - }; + } virtual bool Run(cv::Mat* im, ImageBlob* data); private: @@ -167,9 +167,6 @@ class Padding : public Transform { height_ = item["target_size"].as>()[1]; } } - if (item["im_padding_value"].IsDefined()) { - value_ = item["im_padding_value"].as>(); - } } virtual bool Run(cv::Mat* im, ImageBlob* data); @@ -177,7 +174,6 @@ class Padding : public Transform { int coarsest_stride_ = -1; int width_ = 0; int height_ = 0; - std::vector value_; }; class Transforms { diff --git a/deploy/cpp/src/paddlex.cpp b/deploy/cpp/src/paddlex.cpp index fb7c12c2e47b6bdc030ccef36bac1277e021436e..90a4a4452b9e5f3eba1c0b4c7ab88f5b91e03971 100644 --- a/deploy/cpp/src/paddlex.cpp +++ b/deploy/cpp/src/paddlex.cpp @@ -65,6 +65,15 @@ bool Model::load_config(const std::string& model_dir) { YAML::Node config = YAML::LoadFile(yaml_file); type = config["_Attributes"]["model_type"].as(); name = config["Model"].as(); + std::string version = config["version"].as(); + if (version[0] == '0') { + std::cerr << "[Init] Version of the loaded model is lower than 1.0.0, deployment " + << "cannot be done, please refer to " + << "https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/tutorials/deploy/upgrade_version.md " + << "to transfer version." + << std::endl; + return false; + } bool to_rgb = true; if (config["TransformsMode"].IsDefined()) { std::string mode = config["TransformsMode"].as(); @@ -89,7 +98,7 @@ bool Model::load_config(const std::string& model_dir) { bool Model::preprocess(const cv::Mat& input_im, ImageBlob* blob) { cv::Mat im = input_im.clone(); - if (!transforms_.Run(&im, &inputs_)) { + if (!transforms_.Run(&im, blob)) { return false; } return true; diff --git a/deploy/lite/export_lite.py b/deploy/lite/export_lite.py index b56aee9ee803b943473178b67f38c0f8d41a00da..0286d8733868dfbbaceadbfcf7d6728e367341df 100644 --- a/deploy/lite/export_lite.py +++ b/deploy/lite/export_lite.py @@ -19,30 +19,30 @@ import argparse def export_lite(): opt = lite.Opt() - model_file = os.path.join(FLAGS.model_path, '__model__') - params_file = os.path.join(FLAGS.model_path, '__params__') - opt.run_optimize("", model_file, params_file, FLAGS.place, FLAGS.save_dir) + model_file = os.path.join(FLAGS.model_dir, '__model__') + params_file = os.path.join(FLAGS.model_dir, '__params__') + opt.run_optimize("", model_file, params_file, FLAGS.place, FLAGS.save_file) if __name__ == '__main__': parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - "--model_path", + "--model_dir", type=str, default="", - help="model path.", + help="path of '__model__' and '__params__'.", required=True) parser.add_argument( "--place", type=str, default="arm", - help="preprocess config path.", + help="run place: 'arm|opencl|x86|npu|xpu|rknpu|apu'.", required=True) parser.add_argument( - "--save_dir", + "--save_file", type=str, default="paddlex.onnx", - help="Directory for storing the output visualization files.", + help="file name for storing the output files.", required=True) FLAGS = parser.parse_args() export_lite() diff --git a/docs/FAQ.md b/docs/FAQ.md index 8da14f32e428f868f637a395223855aa66371bbf..b120ebd10ed791c65c3f65e611c5b45da2a9211f 100755 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -60,3 +60,9 @@ ## 11. 每次训练新的模型,都需要重新下载预训练模型,怎样可以下载一次就搞定 > 1.可以按照9的方式来解决这个问题 > 2.每次训练前都设定`paddlex.pretrain_dir`路径,如设定`paddlex.pretrain_dir='/usrname/paddlex`,如此下载完的预训练模型会存放至`/usrname/paddlex`目录下,而已经下载在该目录的模型也不会再次重复下载 + +## 12. 程序启动时提示"Failed to execute script PaddleX",如何解决? +> 1. 请检查目标机器上PaddleX程序所在路径是否包含中文。目前暂不支持中文路径,请尝试将程序移动到英文目录。 +> 2. 如果您的系统是Windows 7或者Windows Server 2012时,原因是缺少MFPlat.DLL/MF.dll/MFReadWrite.dll等OpenCV依赖的DLL,请按如下方式安装桌面体验:通过“我的电脑”-->“属性”-->"管理"打开服务器管理器,点击右上角“管理”选择“添加角色和功能”。点击“服务器选择”-->“功能”,拖动滚动条到最下端,点开“用户界面和基础结构”,勾选“桌面体验”后点击“安装”,等安装完成尝试再次运行PaddleX。 +> 3. 请检查目标机器上是否有其他的PaddleX程序或者进程在运行中,如有请退出或者重启机器看是否解决 +> 4. 请确认运行程序的用户是否有管理员权限,如非管理员权限用户请尝试使用管理员运行看是否成功 \ No newline at end of file diff --git a/docs/apis/datasets/detection.md b/docs/apis/datasets/detection.md index e660d7edfa9cfc41582902b92bcf0b0977766222..a32b6be5de6246ef6e28ebe376ded7e3faf82ff7 100755 --- a/docs/apis/datasets/detection.md +++ b/docs/apis/datasets/detection.md @@ -8,7 +8,7 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None, > 仅用于**目标检测**。读取PascalVOC格式的检测数据集,并对样本进行相应的处理。PascalVOC数据集格式的介绍可查看文档:[数据集格式说明](../datasets.md) -> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_mobilenetv1.py#L29) +> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_darknet53.py#L29) > **参数** @@ -21,6 +21,16 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None, > > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。 > > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。 +> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义VOCDetection类后调用其成员函数`add_negative_samples`添加背景图片即可: +> ``` +> add_negative_samples(image_dir) +> ``` +> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4) + +> > **参数** + +> > > * **image_dir** (str): 背景图片所在的目录路径。 + ## CocoDetection类 ``` @@ -41,6 +51,16 @@ paddlex.datasets.CocoDetection(data_dir, ann_file, transforms=None, num_workers= > > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。 > > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。 +> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义CocoDetection类后调用其成员函数`add_negative_samples`添加背景图片即可: +> ``` +> add_negative_samples(image_dir) +> ``` +> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4) + +> > **参数** + +> > > * **image_dir** (str): 背景图片所在的目录路径。 + ## EasyDataDet类 ``` @@ -59,5 +79,15 @@ paddlex.datasets.EasyDataDet(data_dir, file_list, label_list, transforms=None, n > > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。 > > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。 > > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。 -> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。 +> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。 + + +> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义EasyDataDet类后调用其成员函数`add_negative_samples`添加背景图片即可: +> ``` +> add_negative_samples(image_dir) +> ``` +> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4) + +> > **参数** +> > > * **image_dir** (str): 背景图片所在的目录路径。 diff --git a/docs/apis/deploy.md b/docs/apis/deploy.md index ad97dc23957104cebc7e487d38554cb06ced57f2..4ddc0d90a5a2ac17ab9c9154bddf2421489fd8a7 100755 --- a/docs/apis/deploy.md +++ b/docs/apis/deploy.md @@ -1,6 +1,6 @@ -# Predictor部署-paddlex.deploy +# 预测部署-paddlex.deploy -使用AnalysisPredictor进行预测部署。 +使用Paddle Inference进行高性能的Python预测部署。更多关于Paddle Inference信息请参考[Paddle Inference文档](https://paddle-inference.readthedocs.io/en/latest/#) ## Predictor类 @@ -22,6 +22,7 @@ paddlex.deploy.Predictor(model_dir, use_gpu=False, gpu_id=0, use_mkl=False, use_ > > > > ``` > > import paddlex +> > > > model = paddlex.deploy.Predictor(model_dir, use_gpu=True) > > result = model.predict(image_file) > > ``` diff --git a/docs/apis/models/classification.md b/docs/apis/models/classification.md index 4fa083df17d5d87c1b9755e9c224bfd236ebc6b6..82b459d8281b1e9bc9d1f7abdd48fddb16473c21 100755 --- a/docs/apis/models/classification.md +++ b/docs/apis/models/classification.md @@ -15,7 +15,7 @@ paddlex.cls.ResNet50(num_classes=1000) ### train 训练接口 ```python -train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None) +train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, warmup_steps=0, warmup_start_lr=0.0, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None) ``` > > **参数** @@ -30,12 +30,14 @@ train(self, num_epochs, train_dataset, train_batch_size=64, eval_dataset=None, s > > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.025。 +> > - **warmup_steps** (int): 默认优化器的warmup步数,学习率将在设定的步数内,从warmup_start_lr线性增长至设定的learning_rate,默认为0。 +> > - **warmup_start_lr**(float): 默认优化器的warmup起始学习率,默认为0.0。 > > - **lr_decay_epochs** (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。 > > - **lr_decay_gamma** (float): 默认优化器的学习率衰减率。默认为0.1。 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。 > > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 -> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 +> > - **early_stop** (bool): 是否使用提前终止训练策略。默认值为False。 > > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 > > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 @@ -186,3 +188,7 @@ paddlex.cls.DenseNet161(num_classes=1000) paddlex.cls.DenseNet201(num_classes=1000) ``` +### HRNet_W18 +```python +paddlex.cls.HRNet_W18(num_classes=1000) +``` diff --git a/docs/apis/models/detection.md b/docs/apis/models/detection.md index 2039be6db803d0f0c4335346e7b6127565c684af..f76e5598636f6c8ac94b90acca7fe1c846708077 100755 --- a/docs/apis/models/detection.md +++ b/docs/apis/models/detection.md @@ -9,7 +9,7 @@ paddlex.det.YOLOv3(num_classes=80, backbone='MobileNetV1', anchors=None, anchor_ > 构建YOLOv3检测器。**注意在YOLOv3,num_classes不需要包含背景类,如目标包括human、dog两种,则num_classes设为2即可,这里与FasterRCNN/MaskRCNN有差别** > **参数** -> +> > > - **num_classes** (int): 类别数。默认为80。 > > - **backbone** (str): YOLOv3的backbone网络,取值范围为['DarkNet53', 'ResNet34', 'MobileNetV1', 'MobileNetV3_large']。默认为'MobileNetV1'。 > > - **anchors** (list|tuple): anchor框的宽度和高度,为None时表示使用默认值 @@ -42,7 +42,7 @@ train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, sa > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为20。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **save_dir** (str): 模型保存路径。默认值为'output'。 -> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 +> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 > > - **learning_rate** (float): 默认优化器的学习率。默认为1.0/8000。 > > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为1000。 @@ -53,7 +53,7 @@ train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, sa > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认值为False。 > > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在PascalVOC数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 -> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 +> > - **early_stop** (bool): 是否使用提前终止训练策略。默认值为False。 > > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 > > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 @@ -107,7 +107,7 @@ paddlex.det.FasterRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspec > **参数** > > - **num_classes** (int): 包含了背景类的类别数。默认为81。 -> > - **backbone** (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。 +> > - **backbone** (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。 > > - **with_fpn** (bool): 是否使用FPN结构。默认为True。 > > - **aspect_ratios** (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。 > > - **anchor_sizes** (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。 @@ -129,7 +129,7 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, sa > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **save_dir** (str): 模型保存路径。默认值为'output'。 -> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 +> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重(注意:暂未提供ResNet18的COCO预训练模型);为None,则不使用预训练模型。默认为None。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.0025。 > > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为500。 diff --git a/docs/apis/models/instance_segmentation.md b/docs/apis/models/instance_segmentation.md index f08645478cdafa845431ed52b68387f672886d17..72d008b2252a0df73648941d8dbee9d6f8a8764a 100755 --- a/docs/apis/models/instance_segmentation.md +++ b/docs/apis/models/instance_segmentation.md @@ -12,7 +12,7 @@ paddlex.det.MaskRCNN(num_classes=81, backbone='ResNet50', with_fpn=True, aspect_ > **参数** > > - **num_classes** (int): 包含了背景类的类别数。默认为81。 -> > - **backbone** (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。 +> > - **backbone** (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。 > > - **with_fpn** (bool): 是否使用FPN结构。默认为True。 > > - **aspect_ratios** (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。 > > - **anchor_sizes** (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。 @@ -34,7 +34,7 @@ train(self, num_epochs, train_dataset, train_batch_size=1, eval_dataset=None, sa > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **save_dir** (str): 模型保存路径。默认值为'output'。 -> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 +> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重(注意:暂未提供ResNet18和HRNet_W18的COCO预训练模型);若为None,则不使用预训练模型。默认为None。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.00125。 > > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为500。 @@ -82,4 +82,4 @@ predict(self, img_file, transforms=None) > > **返回值** > -> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度,其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。 +> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度。其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。Mask信息为原图大小的二值图,1表示像素点属于预测类别,0表示像素点是背景。 diff --git a/docs/apis/models/semantic_segmentation.md b/docs/apis/models/semantic_segmentation.md index 12b54d8c6114466771e25bdd88d21466018ed5aa..26a695a9564f6929ff586eaa179242b99b5466de 100755 --- a/docs/apis/models/semantic_segmentation.md +++ b/docs/apis/models/semantic_segmentation.md @@ -12,7 +12,7 @@ paddlex.seg.DeepLabv3p(num_classes=2, backbone='MobileNetV2_x1.0', output_stride > **参数** > > - **num_classes** (int): 类别数。 -> > - **backbone** (str): DeepLabv3+的backbone网络,实现特征图的计算,取值范围为['Xception65', 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.0', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0'],'MobileNetV2_x1.0'。 +> > - **backbone** (str): DeepLabv3+的backbone网络,实现特征图的计算,取值范围为['Xception65', 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.0', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0'],默认值为'MobileNetV2_x1.0'。 > > - **output_stride** (int): backbone 输出特征图相对于输入的下采样倍数,一般取值为8或16。默认16。 > > - **aspp_with_sep_conv** (bool): decoder模块是否采用separable convolutions。默认True。 > > - **decoder_use_sep_conv** (bool): decoder模块是否采用separable convolutions。默认True。 @@ -40,14 +40,14 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **save_dir** (str): 模型保存路径。默认'output' -> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET'。 +> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重(注意:暂未提供Xception41、MobileNetV2_x0.25、MobileNetV2_x0.5、MobileNetV2_x1.5、MobileNetV2_x2.0的COCO预训练模型);若为字符串'CITYSCAPES',则自动下载在CITYSCAPES数据集上预训练的模型权重(注意:暂未提供Xception41、MobileNetV2_x0.25、MobileNetV2_x0.5、MobileNetV2_x1.5、MobileNetV2_x2.0的CITYSCAPES预训练模型);若为None,则不使用预训练模型。默认'IMAGENET'。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。 > > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。 -> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 +> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 -> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 +> > - **early_stop** (bool): 是否使用提前终止训练策略。默认值为False。 > > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 > > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 @@ -124,12 +124,12 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **save_dir** (str): 模型保存路径。默认'output' -> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在COCO图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'COCO'。 +> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'COCO',则自动下载在COCO图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'COCO'。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。 > > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。 -> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 +> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 > > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 > > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 @@ -173,3 +173,173 @@ predict(self, im_file, transforms=None): > **返回值** > > > > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。 + + +## HRNet类 + +```python +paddlex.seg.HRNet(num_classes=2, width=18, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255) +``` + +> 构建HRNet分割器。 + +> **参数** + +> > - **num_classes** (int): 类别数。 +> > - **width** (int): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64]。 +> > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。 +> > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。 +> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。 +> > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。 + +### train 训练接口 + +```python +train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None): +``` + +> HRNet模型训练接口。 + +> **参数** +> > +> > - **num_epochs** (int): 训练迭代轮数。 +> > - **train_dataset** (paddlex.datasets): 训练数据读取器。 +> > - **train_batch_size** (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。 +> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。 +> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 +> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 +> > - **save_dir** (str): 模型保存路径。默认'output' +> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet数据集上预训练的模型权重;若为字符串'CITYSCAPES',则自动下载在CITYSCAPES图片数据上预训练的模型权重(注意:目前仅提供`width`取值为18的CITYSCAPES预训练模型);若为None,则不使用预训练模型。默认'IMAGENET'。 +> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 +> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。 +> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。 +> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。 +> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 +> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 +> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 +> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 +> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 + +#### evaluate 评估接口 + +``` +evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False): +``` + +> HRNet模型评估接口。 + +> **参数** +> > +> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。 +> > - **batch_size** (int): 评估时的batch大小。默认1。 +> > - **epoch_id** (int): 当前评估模型所在的训练轮数。 +> > - **return_details** (bool): 是否返回详细信息。默认False。 + +> **返回值** +> > +> > - **dict**: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、 +> > 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。 +> > - **tuple** (metrics, eval_details):当return_details为True时,增加返回dict (eval_details), +> > 包含关键字:'confusion_matrix',表示评估的混淆矩阵。 + +#### predict 预测接口 + +``` +predict(self, im_file, transforms=None): +``` + +> HRNet模型预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`UNet.test_transforms`和`UNet.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。 + +> **参数** +> > +> > - **img_file** (str): 预测图像路径。 +> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。 + +> **返回值** +> > +> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。 + + +## FastSCNN类 + +```python +paddlex.seg.FastSCNN(num_classes=2, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255, multi_loss_weight=[1.0]) +``` + +> 构建FastSCNN分割器。 + +> **参数** + +> > - **num_classes** (int): 类别数。 +> > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。 +> > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。 +> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。 +> > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。 +> > - **multi_loss_weight** (list): 多分支上的loss权重。默认计算一个分支上的loss,即默认值为[1.0]。也支持计算两个分支或三个分支上的loss,权重按[fusion_branch_weight, higher_branch_weight, lower_branch_weight]排列,fusion_branch_weight为空间细节分支和全局上下文分支融合后的分支上的loss权重,higher_branch_weight为空间细节分支上的loss权重,lower_branch_weight为全局上下文分支上的loss权重,若higher_branch_weight和lower_branch_weight未设置则不会计算这两个分支上的loss。 + +### train 训练接口 + +```python +train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='CITYSCAPES', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None): +``` + +> FastSCNN模型训练接口。 + +> **参数** +> > +> > - **num_epochs** (int): 训练迭代轮数。 +> > - **train_dataset** (paddlex.datasets): 训练数据读取器。 +> > - **train_batch_size** (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。 +> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。 +> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 +> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 +> > - **save_dir** (str): 模型保存路径。默认'output' +> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'CITYSCAPES',则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'CITYSCAPES'。 +> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 +> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。 +> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。 +> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。 +> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 +> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 +> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 +> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 +> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 + +#### evaluate 评估接口 + +``` +evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False): +``` + +> FastSCNN模型评估接口。 + +> **参数** +> > +> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。 +> > - **batch_size** (int): 评估时的batch大小。默认1。 +> > - **epoch_id** (int): 当前评估模型所在的训练轮数。 +> > - **return_details** (bool): 是否返回详细信息。默认False。 + +> **返回值** +> > +> > - **dict**: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、 +> > 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。 +> > - **tuple** (metrics, eval_details):当return_details为True时,增加返回dict (eval_details), +> > 包含关键字:'confusion_matrix',表示评估的混淆矩阵。 + +#### predict 预测接口 + +``` +predict(self, im_file, transforms=None): +``` + +> FastSCNN模型预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`UNet.test_transforms`和`UNet.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。 + +> **参数** +> > +> > - **img_file** (str): 预测图像路径。 +> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。 + +> **返回值** +> > +> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。 diff --git a/docs/apis/transforms/augment.md b/docs/apis/transforms/augment.md index bc37d4853034f42361edd23e2719c119cda5a9bb..f8c66b45d2d853fde57c520b079f9974e5fa4d76 100644 --- a/docs/apis/transforms/augment.md +++ b/docs/apis/transforms/augment.md @@ -9,8 +9,8 @@ PaddleX对于图像分类、目标检测、实例分割和语义分割内置了 | 任务类型 | 增强方法 | | :------- | :------------| | 图像分类 | [RandomCrop](cls_transforms.html#randomcrop)、[RandomHorizontalFlip](cls_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](cls_transforms.html#randomverticalflip)、
[RandomRotate](cls_transforms.html#randomratate)、 [RandomDistort](cls_transforms.html#randomdistort) | -|目标检测
实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)、[RandomDistort](det_transforms.html#randomdistort)、[RandomCrop](det_transforms.html#randomcrop)、
[[MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)、RandomExpand](det_transforms.html#randomexpand) | -|语义分割 | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](seg_transforms.html#randomverticalflip)、[RandomRangeScaling](seg_transforms.html#randomrangescaling)、
[RandomStepScaling](seg_transforms.html#randomstepscaling)、[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)、 [RandomBlur](seg_transforms.html#randomblur)、
[RandomRotation](seg_transforms.html#randomrotation)、[RandomScaleAspect](seg_transforms.html#randomscaleaspect)、[RandomDistort](seg_transforms.html#randomdistort) | +|目标检测
实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)、[RandomDistort](det_transforms.html#randomdistort)、[RandomCrop](det_transforms.html#randomcrop)、
[MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)、[RandomExpand](det_transforms.html#randomexpand) | +|语义分割 | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](seg_transforms.html#randomverticalflip)、[RandomRangeScaling](seg_transforms.html#randomrangescaling)、
[RandomStepScaling](seg_transforms.html#randomstepscaling)、[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)、 [RandomBlur](seg_transforms.html#randomblur)、
[RandomRotate](seg_transforms.html#randomrotate)、[RandomScaleAspect](seg_transforms.html#randomscaleaspect)、[RandomDistort](seg_transforms.html#randomdistort) | ## imgaug增强库的支持 diff --git a/docs/apis/transforms/cls_transforms.md b/docs/apis/transforms/cls_transforms.md index ccffc30292c7b5dea81ba6fe62a42a1810203ca9..7d124b9bed4445eb7a216587cde8a35532f54a48 100755 --- a/docs/apis/transforms/cls_transforms.md +++ b/docs/apis/transforms/cls_transforms.md @@ -15,7 +15,7 @@ paddlex.cls.transforms.Compose(transforms) ## RandomCrop类 ```python -paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.88, lower_ratio=3. / 4, upper_ratio=4. / 3) +paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.08, lower_ratio=3. / 4, upper_ratio=4. / 3) ``` 对图像进行随机剪裁,模型训练时的数据增强操作。 @@ -26,7 +26,7 @@ paddlex.cls.transforms.RandomCrop(crop_size=224, lower_scale=0.88, lower_ratio=3 ### 参数 * **crop_size** (int): 随机裁剪后重新调整的目标边长。默认为224。 -* **lower_scale** (float): 裁剪面积相对原面积比例的最小限制。默认为0.88。 +* **lower_scale** (float): 裁剪面积相对原面积比例的最小限制。默认为0.08。 * **lower_ratio** (float): 宽变换比例的最小限制。默认为3. / 4。 * **upper_ratio** (float): 宽变换比例的最小限制。默认为4. / 3。 @@ -122,3 +122,64 @@ paddlex.cls.transforms.RandomDistort(brightness_range=0.9, brightness_prob=0.5, * **saturation_prob** (float): 随机调整饱和度的概率。默认为0.5。 * **hue_range** (int): 色调因子的范围。默认为18。 * **hue_prob** (float): 随机调整色调的概率。默认为0.5。 + +## ComposedClsTransforms类 +```python +paddlex.cls.transforms.ComposedClsTransforms(mode, crop_size=[224, 224], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) +``` +分类模型中已经组合好的数据处理流程,开发者可以直接使用ComposedClsTransforms,简化手动组合transforms的过程, 该类中已经包含了[RandomCrop](#RandomCrop)和[RandomHorizontalFlip](#RandomHorizontalFlip)两种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。 +ComposedClsTransforms共包括以下几个步骤: +> 训练阶段: +> > 1. 随机从图像中crop一块子图,并resize成crop_size大小 +> > 2. 将1的输出按0.5的概率随机进行水平翻转 +> > 3. 将图像进行归一化 +> 验证/预测阶段: +> > 1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14 +> > 2. 从图像中心crop出一个大小为crop_size的图像 +> > 3. 将图像进行归一化 + +### 参数 +* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test' +* **crop_size** (int|list): 输入到模型里的图像大小,默认为[224, 224](与原图大小无关,根据上述几个步骤,会将原图处理成该图大小输入给模型训练) +* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。 +* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。 + +### 添加数据增强方式 +```python +ComposedClsTransforms.add_augmenters(augmenters) +``` +> **参数** +> * **augmenters**(list): 数据增强方式列表 + +#### 使用示例 +``` +import paddlex as pdx +from paddlex.cls import transforms +train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[320, 320]) +eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[320, 320]) + +# 添加数据增强 +import imgaug.augmenters as iaa +train_transforms.add_augmenters([ + transforms.RandomDistort(), + iaa.blur.GaussianBlur(sigma=(0.0, 3.0)) +]) +``` +上面代码等价于 +``` +import paddlex as pdx +from paddlex.cls import transforms +train_transforms = transforms.Composed([ + transforms.RandomDistort(), + iaa.blur.GaussianBlur(sigma=(0.0, 3.0)), + # 上面两个为通过add_augmenters额外添加的数据增强方式 + transforms.RandomCrop(crop_size=320), + transforms.RandomHorizontalFlip(prob=0.5), + transforms.Normalize() +]) +eval_transforms = transforms.Composed([ + transforms.ResizeByShort(short_size=int(320*1.14)), + transforms.CenterCrop(crop_size=320), + transforms.Normalize() +]) +``` diff --git a/docs/apis/transforms/det_transforms.md b/docs/apis/transforms/det_transforms.md index 0ee6e57ee778769c0e363eaee9050b36d5f6eb5a..3b182a1e4eeb7fdbe2d40c7530989d54919d8ec2 100755 --- a/docs/apis/transforms/det_transforms.md +++ b/docs/apis/transforms/det_transforms.md @@ -167,3 +167,133 @@ paddlex.det.transforms.RandomCrop(aspect_ratio=[.5, 2.], thresholds=[.0, .1, .3, * **num_attempts** (int): 在放弃寻找有效裁剪区域前尝试的次数。默认值为50。 * **allow_no_crop** (bool): 是否允许未进行裁剪。默认值为True。 * **cover_all_box** (bool): 是否要求所有的真实标注框都必须在裁剪区域内。默认值为False。 + +## ComposedRCNNTransforms类 +```python +paddlex.det.transforms.ComposedRCNNTransforms(mode, min_max_size=[224, 224], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) +``` +目标检测FasterRCNN和实例分割MaskRCNN模型中已经组合好的数据处理流程,开发者可以直接使用ComposedRCNNTransforms,简化手动组合transforms的过程, 该类中已经包含了[RandomHorizontalFlip](#RandomHorizontalFlip)数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。 +ComposedRCNNTransforms共包括以下几个步骤: +> 训练阶段: +> > 1. 随机以0.5的概率将图像水平翻转 +> > 2. 将图像进行归一化 +> > 3. 图像采用[ResizeByShort](#ResizeByShort)方式,根据min_max_size参数,进行缩入 +> > 4. 使用[Padding](#Padding)将图像的长和宽分别Padding成32的倍数 +> 验证/预测阶段: +> > 1. 将图像进行归一化 +> > 2. 图像采用[ResizeByShort](#ResizeByShort)方式,根据min_max_size参数,进行缩入 +> > 3. 使用[Padding](#Padding)将图像的长和宽分别Padding成32的倍数 + +### 参数 +* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test' +* **min_max_size** (list): 输入模型中图像的最短边长度和最长边长度,参考[ResizeByShort](#ResizeByShort)(与原图大小无关,根据上述几个步骤,会将原图处理成相应大小输入给模型训练),默认[800, 1333] +* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。 +* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。 + +### 添加数据增强方式 +```python +ComposedRCNNTransforms.add_augmenters(augmenters) +``` +> **参数** +> * **augmenters**(list): 数据增强方式列表 + +#### 使用示例 +``` +import paddlex as pdx +from paddlex.det import transforms +train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333]) +eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333]) + +# 添加数据增强 +import imgaug.augmenters as iaa +train_transforms.add_augmenters([ + transforms.RandomDistort(), + iaa.blur.GaussianBlur(sigma=(0.0, 3.0)) +]) +``` +上面代码等价于 +``` +import paddlex as pdx +from paddlex.det import transforms +train_transforms = transforms.Composed([ + transforms.RandomDistort(), + iaa.blur.GaussianBlur(sigma=(0.0, 3.0)), + # 上面两个为通过add_augmenters额外添加的数据增强方式 + transforms.RandomHorizontalFlip(prob=0.5), + transforms.Normalize(), + transforms.ResizeByShort(short_size=800, max_size=1333), + transforms.Padding(coarsest_stride=32) +]) +eval_transforms = transforms.Composed([ + transforms.Normalize(), + transforms.ResizeByShort(short_size=800, max_size=1333), + transforms.Padding(coarsest_stride=32) +]) +``` + + +## ComposedYOLOv3Transforms类 +```python +paddlex.det.transforms.ComposedYOLOv3Transforms(mode, shape=[608, 608], mixup_epoch=250, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) +``` +目标检测YOLOv3模型中已经组合好的数据处理流程,开发者可以直接使用ComposedYOLOv3Transforms,简化手动组合transforms的过程, 该类中已经包含了[MixupImage](#MixupImage)、[RandomDistort](#RandomDistort)、[RandomExpand](#RandomExpand)、[RandomCrop](#RandomCrop)、[RandomHorizontalFlip](#RandomHorizontalFlip)5种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。 +ComposedYOLOv3Transforms共包括以下几个步骤: +> 训练阶段: +> > 1. 在前mixup_epoch轮迭代中,使用MixupImage策略 +> > 2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调 +> > 3. 随机扩充图像 +> > 4. 随机裁剪图像 +> > 5. 将4步骤的输出图像Resize成shape参数的大小 +> > 6. 随机0.5的概率水平翻转图像 +> > 7. 图像归一化 +> 验证/预测阶段: +> > 1. 将图像Resize成shape参数大小 +> > 2. 图像归一化 + +### 参数 +* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test' +* **shape** (list): 输入模型中图像的大小(与原图大小无关,根据上述几个步骤,会将原图处理成相应大小输入给模型训练), 默认[608, 608] +* **mixup_epoch**(int): 模型训练过程中,在前mixup_epoch轮迭代中,使用mixup策略,如果为-1,则不使用mixup策略, 默认250。 +* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。 +* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。 + +### 添加数据增强方式 +```python +ComposedYOLOv3Transforms.add_augmenters(augmenters) +``` +> **参数** +> * **augmenters**(list): 数据增强方式列表 + +#### 使用示例 +``` +import paddlex as pdx +from paddlex.det import transforms +train_transforms = transforms.ComposedYOLOv3Transforms(mode='train', shape=[480, 480]) +eval_transforms = transforms.ComposedYOLOv3Transforms(mode='eval', shape=[480, 480]) + +# 添加数据增强 +import imgaug.augmenters as iaa +train_transforms.add_augmenters([ + iaa.blur.GaussianBlur(sigma=(0.0, 3.0)) +]) +``` +上面代码等价于 +``` +import paddlex as pdx +from paddlex.det import transforms +train_transforms = transforms.Composed([ + iaa.blur.GaussianBlur(sigma=(0.0, 3.0)), + # 上面为通过add_augmenters额外添加的数据增强方式 + transforms.MixupImage(mixup_epoch=250), + transforms.RandomDistort(), + transforms.RandomExpand(), + transforms.RandomCrop(), + transforms.Resize(target_size=480, interp='RANDOM'), + transforms.RandomHorizontalFlip(prob=0.5), + transforms.Normalize() +]) +eval_transforms = transforms.Composed([ + transforms.Resize(target_size=480, interp='CUBIC'), + transforms.Normalize() +]) +``` diff --git a/docs/apis/transforms/seg_transforms.md b/docs/apis/transforms/seg_transforms.md index 1708290680e767b3a06615f0b789279b716433ea..1fb2b561e4818edad72fd97f43029de079b355b3 100755 --- a/docs/apis/transforms/seg_transforms.md +++ b/docs/apis/transforms/seg_transforms.md @@ -120,7 +120,7 @@ paddlex.seg.transforms.RandomBlur(prob=0.1) * **prob** (float): 图像模糊概率。默认为0.1。 -## RandomRotation类 +## RandomRotate类 ```python paddlex.seg.transforms.RandomRotate(rotate_range=15, im_padding_value=[127.5, 127.5, 127.5], label_padding_value=255) ``` @@ -166,3 +166,63 @@ paddlex.seg.transforms.RandomDistort(brightness_range=0.5, brightness_prob=0.5, * **saturation_prob** (float): 随机调整饱和度的概率。默认为0.5。 * **hue_range** (int): 色调因子的范围。默认为18。 * **hue_prob** (float): 随机调整色调的概率。默认为0.5。 + +## ComposedSegTransforms类 +```python +paddlex.det.transforms.ComposedSegTransforms(mode, train_crop_shape=[769, 769], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) +``` +语义分割DeepLab和UNet模型中已经组合好的数据处理流程,开发者可以直接使用ComposedSegTransforms,简化手动组合transforms的过程, 该类中已经包含了[RandomHorizontalFlip](#RandomHorizontalFlip)、[ResizeStepScaling](#ResizeStepScaling)、[RandomPaddingCrop](#RandomPaddingCrop)3种数据增强方式,你仍可以通过[add_augmenters函数接口](#add_augmenters)添加新的数据增强方式。 +ComposedSegTransforms共包括以下几个步骤: + > 训练阶段: +> > 1. 随机对图像以0.5的概率水平翻转 +> > 2. 按不同的比例随机Resize原图 +> > 3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小 +> > 4. 图像归一化 + > 预测阶段: +> > 1. 图像归一化 + + +### 参数 +* **mode** (str): Transforms所处的阶段,包括`train', 'eval'或'test' +* **train_crop_size** (list): 训练过程中随机Crop和Resize后(验证或预测过程中不需配置该参数,自动使用原图大小),输入到模型中图像的大小(与原图大小无关,根据上述几个步骤,会将原图处理成相应大小输入给模型训练), 默认[769, 769] +* **mean** (list): 图像均值, 默认为[0.485, 0.456, 0.406]。 +* **std** (list): 图像方差,默认为[0.229, 0.224, 0.225]。 + +### 添加数据增强方式 +```python +ComposedSegTransforms.add_augmenters(augmenters) +``` +> **参数** +> * **augmenters**(list): 数据增强方式列表 + +#### 使用示例 +``` +import paddlex as pdx +from paddlex.seg import transforms +train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[512, 512]) +eval_transforms = transforms.ComposedYOLOTransforms(mode='eval') + +# 添加数据增强 +import imgaug.augmenters as iaa +train_transforms.add_augmenters([ + transforms.RandomDistort(), + iaa.blur.GaussianBlur(sigma=(0.0, 3.0)) +]) +``` +上面代码等价于 +``` +import paddlex as pdx +from paddlex.det import transforms +train_transforms = transforms.Composed([ + transforms.RandomDistort(), + iaa.blur.GaussianBlur(sigma=(0.0, 3.0)), + # 上面2行为通过add_augmenters额外添加的数据增强方式 + transforms.RandomHorizontalFlip(prob=0.5), + transforms.ResizeStepScaling(), + transforms.PaddingCrop(crop_size=[512, 512]), + transforms.Normalize() +]) +eval_transforms = transforms.Composed([ + transforms.Normalize() +]) +``` diff --git a/docs/appendix/model_zoo.md b/docs/appendix/model_zoo.md index f48ea51361f85c6fdeb22c6c03e8d6f982fbd439..200847bc95aec5872879c3fbbe49b6f2ed0c741e 100644 --- a/docs/appendix/model_zoo.md +++ b/docs/appendix/model_zoo.md @@ -6,47 +6,56 @@ | 模型 | 模型大小 | 预测速度(毫秒) | Top1准确率(%) | Top5准确率(%) | | :----| :------- | :----------- | :--------- | :--------- | -| ResNet18| 46.9MB | 1.499 | 71.0 | 89.9 | -| ResNet34| 87.5MB | 2.272 | 74.6 | 92.1 | -| ResNet50| 102.7MB | 2.939 | 76.5 | 93.0 | -| ResNet101 |179.1MB | 5.314 | 77.6 | 93.6 | -| ResNet50_vd |102.8MB | 3.165 | 79.1 | 94.4 | -| ResNet101_vd| 179.2MB | 5.252 | 80.2 | 95.0 | -| ResNet50_vd_ssld |102.8MB | 3.165 | 82.4 | 96.1 | -| ResNet101_vd_ssld| 179.2MB | 5.252 | 83.7 | 96.7 | -| DarkNet53|166.9MB | 3.139 | 78.0 | 94.1 | -| MobileNetV1 | 16.0MB | 32.523 | 71.0 | 89.7 | -| MobileNetV2 | 14.0MB | 23.318 | 72.2 | 90.7 | -| MobileNetV3_large| 21.0MB | 19.308 | 75.3 | 93.2 | -| MobileNetV3_small | 12.0MB | 6.546 | 68.2 | 88.1 | -| MobileNetV3_large_ssld| 21.0MB | 19.308 | 79.0 | 94.5 | -| MobileNetV3_small_ssld | 12.0MB | 6.546 | 71.3 | 90.1 | -| Xception41 |92.4MB | 4.408 | 79.6 | 94.4 | -| Xception65 | 144.6MB | 6.464 | 80.3 | 94.5 | -| DenseNet121 | 32.8MB | 4.371 | 75.7 | 92.6 | -| DenseNet161|116.3MB | 8.863 | 78.6 | 94.1 | -| DenseNet201| 84.6MB | 8.173 | 77.6 | 93.7 | -| ShuffleNetV2 | 9.0MB | 10.941 | 68.8 | 88.5 | +| [ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar)| 46.2MB | 3.72882 | 71.0 | 89.9 | +| [ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar)| 87.9MB | 5.50876 | 74.6 | 92.1 | +| [ResNet50](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar)| 103.4MB | 7.76659 | 76.5 | 93.0 | +| [ResNet101](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) |180.4MB | 13.80876 | 77.6 | 93.6 | +| [ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) |103.5MB | 8.20476 | 79.1 | 94.4 | +| [ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar)| 180.5MB | 14.24643 | 80.2 | 95.0 | +| [ResNet50_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_ssld_pretrained.tar) |103.5MB | 7.79264 | 82.4 | 96.1 | +| [ResNet101_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_ssld_pretrained.tar)| 180.5MB | 13.34580 | 83.7 | 96.7 | +| [DarkNet53](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_ImageNet1k_pretrained.tar)|167.4MB | 8.82047 | 78.0 | 94.1 | +| [MobileNetV1](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 17.4MB | 3.42838 | 71.0 | 89.7 | +| [MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 15.0MB | 5.92667 | 72.2 | 90.7 | +| [MobileNetV3_large](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_pretrained.tar)| 22.8MB | 8.31428 | 75.3 | 93.2 | +| [MobileNetV3_small](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_pretrained.tar) | 12.5MB | 7.30689 | 68.2 | 88.1 | +| [MobileNetV3_large_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_ssld_pretrained.tar)| 22.8MB | 8.06651 | 79.0 | 94.5 | +| [MobileNetV3_small_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_ssld_pretrained.tar) | 12.5MB | 7.08837 | 71.3 | 90.1 | +| [Xception41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_deeplab_pretrained.tar) | 109.2MB | 8.15611 | 79.6 | 94.4 | +| [Xception65](https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar) | 161.6MB | 13.87017 | 80.3 | 94.5 | +| [DenseNet121](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet121_pretrained.tar) | 33.1MB | 17.09874 | 75.7 | 92.6 | +| [DenseNet161](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet161_pretrained.tar)| 118.0MB | 22.79690 | 78.6 | 94.1 | +| [DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet201_pretrained.tar)| 84.1MB | 25.26089 | 77.6 | 93.7 | +| [ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 10.2MB | 15.40138 | 68.8 | 88.5 | +| [HRNet_W18](https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar) | 21.29MB |45.25514 | 76.9 | 93.4 | ## 目标检测模型 -> 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla V100测试得到,表中符号`-`表示相关指标暂未测试。 +> 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla V100测试得到),表中符号`-`表示相关指标暂未测试。 | 模型 | 模型大小 | 预测时间(毫秒) | BoxAP(%) | |:-------|:-----------|:-------------|:----------| -|FasterRCNN-ResNet50|135.6MB| 78.450 | 35.2 | -|FasterRCNN-ResNet50_vd| 135.7MB | 79.523 | 36.4 | -|FasterRCNN-ResNet101| 211.7MB | 107.342 | 38.3 | -|FasterRCNN-ResNet50-FPN| 167.2MB | 44.897 | 37.2 | -|FasterRCNN-ResNet50_vd-FPN|168.7MB | 45.773 | 38.9 | -|FasterRCNN-ResNet101-FPN| 251.7MB | 55.782 | 38.7 | -|FasterRCNN-ResNet101_vd-FPN |252MB | 58.785 | 40.5 | -|YOLOv3-DarkNet53|252.4MB | 21.944 | 38.9 | -|YOLOv3-MobileNetv1 |101.2MB | 12.771 | 29.3 | -|YOLOv3-MobileNetv3|94.6MB | - | 31.6 | -| YOLOv3-ResNet34|169.7MB | 15.784 | 36.2 | +|[FasterRCNN-ResNet50](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar)|136.0MB| 197.715 | 35.2 | +|[FasterRCNN-ResNet50_vd](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_1x.tar)| 136.1MB | 475.700 | 36.4 | +|[FasterRCNN-ResNet101](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_1x.tar)| 212.5MB | 582.911 | 38.3 | +|[FasterRCNN-ResNet50-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_1x.tar)| 167.7MB | 83.189 | 37.2 | +|[FasterRCNN-ResNet50_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_2x.tar)|167.8MB | 128.277 | 38.9 | +|[FasterRCNN-ResNet101-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_1x.tar)| 244.2MB | 156.097 | 38.7 | +|[FasterRCNN-ResNet101_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_2x.tar) |244.3MB | 119.788 | 40.5 | +|[FasterRCNN-HRNet_W18-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_hrnetv2p_w18_1x.tar) |115.5MB | 81.592 | 36 | +|[YOLOv3-DarkNet53](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar)|249.2MB | 42.672 | 38.9 | +|[YOLOv3-MobileNetV1](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) |99.2MB | 15.442 | 29.3 | +|[YOLOv3-MobileNetV3_large](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams)|100.7MB | 143.322 | 31.6 | +| [YOLOv3-ResNet34](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar)|170.3MB | 23.185 | 36.2 | ## 实例分割模型 > 表中模型相关指标均为在MSCOCO数据集上测试得到。 +| 模型 | 模型大小 | 预测时间(毫秒) | mIoU(%) | +|:-------|:-----------|:-------------|:----------| +|DeepLabv3+-MobileNetV2_x1.0|-| - | - | +|DeepLabv3+-Xception41|-| - | - | +|DeepLabv3+-Xception65|-| - | - | +|UNet|-| - | - | +|HRNet_w18|-| - | - | diff --git a/docs/appendix/parameters.md b/docs/appendix/parameters.md index 732535dd08f2cc16e16e14fe2a45cd655706dc4c..716cdac92aa6504f1543cb91997f2f1fd89a3e13 100644 --- a/docs/appendix/parameters.md +++ b/docs/appendix/parameters.md @@ -23,3 +23,35 @@ Batch Size指模型在训练过程中,一次性处理的样本数量, 如若 - [实例分割MaskRCNN-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#train) - [语义分割DeepLabv3p-train](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#train) - [语义分割UNet](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#id2) + +## 关于lr_decay_epoch, warmup_steps等参数的说明 + +在PaddleX或其它深度学习模型的训练过程中,经常见到lr_decay_epoch, warmup_steps, warmup_start_lr等参数设置,下面介绍一些这些参数的作用。 + +首先这些参数都是用于控制模型训练过程中学习率的变化方式,例如我们在训练时将learning_rate设为0.1, 通常情况,在模型的训练过程中,学习率一直以0.1不变训练下去, 但为了调出更好的模型效果,我们往往不希望学习率一直保持不变。 + +### warmup_steps和warmup_start_lr + +我们在训练模型时,一般都会使用预训练模型,例如检测模型在训练时使用backbone在ImageNet数据集上的预训练权重。但由于在自行训练时,自己的数据与ImageNet数据集存在较大的差异,可能会一开始由于梯度过大使得训练出现问题,因此可以在刚开始训练时,让学习率以一个较小的值,慢慢增长到设定的学习率。因此`warmup_steps`和`warmup_start_lr`就是这个作用,模型开始训练时,学习率会从`warmup_start_lr`开始,在`warmup_steps`内线性增长到设定的学习率。 + +### lr_decay_epochs和lr_decay_gamma + +`lr_decay_epochs`用于让学习率在模型训练后期逐步衰减,它一般是一个list,如[6, 8, 10],表示学习率在第6个epoch时衰减一次,第8个epoch时再衰减一次,第10个epoch时再衰减一次。每次学习率衰减为之前的学习率*lr_decay_gamma + +### Notice + +在PaddleX中,限制warmup需要在第一个学习率decay衰减前结束,因此要满足下面的公式 +``` +warmup_steps <= lr_decay_epochs[0] * num_steps_each_epoch +``` +其中公式中`num_steps_each_epoch = num_samples_in_train_dataset // train_batch_size`。 + +> 因此如若在训练时PaddleX提示`warmup_steps should be less than xxx`时,即可根据上述公式来调整你的`lr_decay_epochs`或者是`warmup_steps`使得两个参数满足上面的条件 + +> - 图像分类模型 [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#train) +> - FasterRCNN [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn) +> - YOLOv3 [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3) +> - MaskRCNN [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn) +> - DeepLab [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p) +> - UNet [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet) +> - HRNet [train接口文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet) diff --git "a/docs/images/._\346\226\207\344\273\266(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png" "b/docs/images/._\346\226\207\344\273\266(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png" deleted file mode 100644 index 0eea21e4388200485959746b4135f58d45711883..0000000000000000000000000000000000000000 Binary files "a/docs/images/._\346\226\207\344\273\266(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png" and /dev/null differ diff --git "a/docs/images/\346\226\207\344\273\266(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png" "b/docs/images/\346\226\207\344\273\266(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png" deleted file mode 100644 index 17e25a531e68c7655e46ad52d6c607e62bdcc6d6..0000000000000000000000000000000000000000 Binary files "a/docs/images/\346\226\207\344\273\266(p37) BDSZYF000132754-docs jiangjiajun$ pwd :Users:jiangjiajun:Downloads:PaddleX-develop:docs:vdl1.png" and /dev/null differ diff --git a/docs/index.rst b/docs/index.rst index c3276f301bcd54846ba3639441572a26a5dd0f6a..1d8d9c0c124dd0015e7b90634fcb1b1551db87f5 100755 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,13 +26,14 @@ PaddleX是基于飞桨核心框架、开发套件和工具组件的深度学习 cv_solutions.md apis/index.rst paddlex_gui/index.rst + tuning_strategy/index.rst update.md FAQ.md appendix/index.rst * PaddleX版本: v1.0.0 * 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex -* 项目GitHub: https://github.com/PaddlePaddle/PaddleX/tree/develop +* 项目GitHub: https://github.com/PaddlePaddle/PaddleX * 官方QQ用户群: 1045148026 * GitHub Issue反馈: http://www.github.com/PaddlePaddle/PaddleX/issues diff --git a/docs/paddlex_gui/download.md b/docs/paddlex_gui/download.md index 102326977c57ba65c614abca52f65d8a63c80259..77bb9962b37498ec3279a51cdc1faa34da1f498b 100644 --- a/docs/paddlex_gui/download.md +++ b/docs/paddlex_gui/download.md @@ -1 +1,28 @@ -# PaddleX GUI下载安装 +## PaddleX GUI安装 + + PaddleX GUI是提升项目开发效率的核心模块,开发者可快速完成深度学习模型全流程开发。我们诚挚地邀请您前往 [官网](https://www.paddlepaddle.org.cn/paddle/paddleX)下载试用PaddleX GUI可视化前端,并获得您宝贵的意见或开源项目贡献。 + + + +#### 安装推荐环境 + +* **操作系统**: + * Windows7/8/10(推荐Windows 10); + * Mac OS 10.13+; + * Ubuntu 18.04+; + +***注:处理器需为x86_64架构,支持MKL。*** + +* **训练硬件**: + * **GPU**(仅Windows及Linux系统): + 推荐使用支持CUDA的NVIDIA显卡,例如:GTX 1070+以上性能的显卡; + Windows系统X86_64驱动版本>=411.31; + Linux系统X86_64驱动版本>=410.48; + 显存8G以上; + * **CPU**: + PaddleX当前支持您用本地CPU进行训练,但推荐使用GPU以获得更好的开发体验。 + * **内存**:建议8G以上 + * **硬盘空间**:建议SSD剩余空间1T以上(非必须) + +***注:PaddleX在Windows及Mac OS系统只支持单卡模型。Windows系统暂不支持NCCL。*** + diff --git a/docs/paddlex_gui/how_to_use.md b/docs/paddlex_gui/how_to_use.md index 1e9a3eeb69d276596636e814f48fad4fe9f3e9d0..32740c114242ccc2c6b7ecacc3088ba163fe7a3c 100644 --- a/docs/paddlex_gui/how_to_use.md +++ b/docs/paddlex_gui/how_to_use.md @@ -1 +1,148 @@ -# PaddleX GUI如何训练模型 +# PaddleX GUI使用文档 + +飞桨全流程开发工具,集飞桨核心框架、模型库、工具及组件等深度学习开发全流程所需能力于一身,易用易集成,是开发者快速入门深度学习、提升深度学习项目开发效率的最佳辅助工具。 + +PaddleX GUI是一个应用PaddleX实现的一个图形化开发客户端产品,它使得开发者通过键入式输入即可完成深度学习模型全流程开发,可大幅度提升项目开发效率。飞桨团队期待各位开发者基于PaddleX,实现出各种符合自己产业实际需求的产品。 + +我们诚挚地邀请您前往 [官网](https://www.paddlepaddle.org.cn/paddlex)下载试用PaddleX GUI,并获得您宝贵的意见或开源项目贡献。 + + + +## 目录 + +* **产品特性** +* **PaddleX GUI可视化前端** +* **FAQ** + + + +## 产品特性 + +\- **全流程打通** + +将深度学习开发全流程打通,并提供可视化开发界面, 省去了对各环节API的熟悉过程及重复的代码开发,极大地提升了开发效率。 + +\- **易用易集成** + +提供功能最全、最灵活的Python API开发模式,完全开源开放,易于集成和二次开发。键入式输入的图形化开发界面,使得非专业算法人员也可快速进行业务POC。 + +\- **融合产业实践经验** + +融合飞桨产业落地经验,精选高质量的视觉模型方案,开放实际的案例教学,手把手带您实现产业需求落地。 + +\- **教程与服务** + +从数据集准备到上线部署,为您提供业务开发全流程的文档说明及技术服务。开发者可以通过QQ群、微信群、GitHub社区等多种形式与飞桨团队及同业合作伙伴交流沟通。 + + + +## PaddleX GUI 可视化前端 + +**第一步:准备数据** + +在开始模型训练前,您需要根据不同的任务类型,将数据标注为相应的格式。目前PaddleX支持【图像分类】、【目标检测】、【语义分割】、【实例分割】四种任务类型。不同类型任务的数据处理方式可查看[数据标注方式](https://paddlex.readthedocs.io/zh_CN/latest/appendix/datasets.html)。 + + + +**第二步:导入我的数据集** + +①数据标注完成后,您需要根据不同的任务,将数据和标注文件,按照客户端提示更名并保存到正确的文件中。 + +②在客户端新建数据集,选择与数据集匹配的任务类型,并选择数据集对应的路径,将数据集导入。 + +![](images/datasets1.jpg) + +③选定导入数据集后,客户端会自动校验数据及标注文件是否合规,校验成功后,您可根据实际需求,将数据集按比例划分为训练集、验证集、测试集。 + +④您可在「数据分析」模块按规则预览您标注的数据集,双击单张图片可放大查看。 + +![](images/dataset2.jpg) + +**第三步:创建项目** + +① 在完成数据导入后,您可以点击「新建项目」创建一个项目。 + +② 您可根据实际任务需求选择项目的任务类型,需要注意项目所采用的数据集也带有任务类型属性,两者需要进行匹配。 + +![](images/project3.jpg) + + + +**第四步:项目开发** + +① **数据选择**:项目创建完成后,您需要选择已载入客户端并校验后的数据集,并点击下一步,进入参数配置页面。 + +![](images/project1.jpg) + +② **参数配置**:主要分为**模型参数**、**训练参数**、**优化策略**三部分。您可根据实际需求选择模型结构、骨架网络及对应的训练参数、优化策略,使得任务效果最佳。 + +![](images/project2.jpg) + +参数配置完成后,点击启动训练,模型开始训练并进行效果评估。 + +③ **训练可视化**:在训练过程中,您可通过VisualDL查看模型训练过程参数变化、日志详情,及当前最优的训练集和验证集训练指标。模型在训练过程中通过点击"中止训练"随时中止训练过程。 + +![](images/visualization1.jpg) + +模型训练结束后,可选择进入『模型剪裁分析』或者直接进入『模型评估』。 + +![](images/visualization2.jpg) + +④ **模型裁剪**:如果开发者希望减少模型的体积、计算量,提升模型在设备上的预测性能,可以采用PaddleX提供的模型裁剪策略。裁剪过程将对模型各卷积层的敏感度信息进行分析,根据各参数对模型效果的影响进行不同比例的裁剪,再进行精调训练获得最终裁剪后的模型。 + +![](images/visualization3.jpg) + +⑤ **模型评估**:在模型评估页面,您可查看训练后的模型效果。评估方法包括混淆矩阵、精度、召回率等。 + +![](images/visualization4.jpg) + +您还可以选择『数据集切分』时留出的『测试数据集』或从本地文件夹中导入一张/多张图片,将训练后的模型进行测试。根据测试结果,您可决定是否将训练完成的模型保存为预训练模型并进入模型发布页面,或返回先前步骤调整参数配置重新进行训练。 + +![](images/visualization5.jpg) + + + +**第五步:模型发布** + +当模型效果满意后,您可根据实际的生产环境需求,选择将模型发布为需要的版本。 + +![](images/publish.jpg) + + + +## FAQ + +1. **为什么训练速度这么慢?** + + PaddleX完全采用您本地的硬件进行计算,深度学习任务确实对算力要求较高,为了使您能快速体验应用PaddleX进行开发,我们适配了CPU硬件,但强烈建议您使用GPU以提升训练速度和开发体验。 + + + +2. **我可以在服务器或云平台上部署PaddleX么?** + + PaddleX GUI是一个适配本地单机安装的客户端,无法在服务器上直接进行部署,您可以直接使用PaddleX API,或采用飞桨核心框架进行服务器上的部署。如果您希望使用公有算力,强烈建议您尝试飞桨产品系列中的 [EasyDL](https://ai.baidu.com/easydl/) 或 [AI Studio](https://aistudio.baidu.com/aistudio/index)进行开发。 + + + +3. **PaddleX支持EasyData标注的数据吗?** + + 支持,PaddleX可顺畅读取EasyData标注的数据。但当前版本的PaddleX GUI暂时无法支持直接导入EasyData数据格式,您可以参照文档,将[数据集进行转换](https://paddlex.readthedocs.io/zh_CN/latest/appendix/how_to_convert_dataset.html)再导入PaddleX GUI进行后续开发。 + 同时,我们也在紧密开发PaddleX GUI可直接导入EasyData数据格式的功能。 + + + +4. **为什么模型裁剪分析耗时这么长?** + + 模型裁剪分析过程是对模型各卷积层的敏感度信息进行分析,根据各参数对模型效果的影响进行不同比例的裁剪。此过程需要重复多次直至FLOPS满足要求,最后再进行精调训练获得最终裁剪后的模型,因此耗时较长。有关模型裁剪的原理,可参见文档[剪裁原理介绍](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#2-%E5%8D%B7%E7%A7%AF%E6%A0%B8%E5%89%AA%E8%A3%81%E5%8E%9F%E7%90%86) + + + +5. **如何调用后端代码?** + + PaddleX 团队为您整理了相关的API接口文档,方便您学习和使用。具体请参见[PaddleX API说明文档](https://paddlex.readthedocs.io/zh_CN/latest/apis/index.html) + + + +**如果您有更多问题或建议,欢迎以issue的形式,或加入PaddleX官方QQ群(1045148026)直接反馈您的问题和需求** + +![](images/QR.jpg) diff --git a/docs/paddlex_gui/images/QR.jpg b/docs/paddlex_gui/images/QR.jpg new file mode 100644 index 0000000000000000000000000000000000000000..99da2ac887ea9c29e1ee18a79f71bb302422a029 Binary files /dev/null and b/docs/paddlex_gui/images/QR.jpg differ diff --git a/docs/paddlex_gui/images/ReadMe b/docs/paddlex_gui/images/ReadMe new file mode 100644 index 0000000000000000000000000000000000000000..dd1810a3fbed8abc389774ba8ecf63d456945f3c --- /dev/null +++ b/docs/paddlex_gui/images/ReadMe @@ -0,0 +1 @@ +PaddleX GUI Screen Shot diff --git a/docs/paddlex_gui/images/dataset2.jpg b/docs/paddlex_gui/images/dataset2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fc50e82fc1f9368c7037e44e9966cdbd3473429e Binary files /dev/null and b/docs/paddlex_gui/images/dataset2.jpg differ diff --git a/docs/paddlex_gui/images/datasets1.jpg b/docs/paddlex_gui/images/datasets1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..03a78e9a6a6bde1d648171e00f22955a4d9d4b9b Binary files /dev/null and b/docs/paddlex_gui/images/datasets1.jpg differ diff --git a/docs/paddlex_gui/images/project1.jpg b/docs/paddlex_gui/images/project1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d5fa05999309bc495280e74c1f2750b026287c1b Binary files /dev/null and b/docs/paddlex_gui/images/project1.jpg differ diff --git a/docs/paddlex_gui/images/project2.jpg b/docs/paddlex_gui/images/project2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..35a14c9c2a36d046f0ba1323b5e13bc50de75c5a Binary files /dev/null and b/docs/paddlex_gui/images/project2.jpg differ diff --git a/docs/paddlex_gui/images/project3.jpg b/docs/paddlex_gui/images/project3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8d920cc65fce5e6ceb4b3a65b602b329a457e12e Binary files /dev/null and b/docs/paddlex_gui/images/project3.jpg differ diff --git a/docs/paddlex_gui/images/publish.jpg b/docs/paddlex_gui/images/publish.jpg new file mode 100644 index 0000000000000000000000000000000000000000..df4ac6b9630c5c77b696db49f1cd3678d00676bb Binary files /dev/null and b/docs/paddlex_gui/images/publish.jpg differ diff --git a/docs/paddlex_gui/images/visualization1.jpg b/docs/paddlex_gui/images/visualization1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..40bf8736d1f966ecb7c7a2ee3af49c690211a2e0 Binary files /dev/null and b/docs/paddlex_gui/images/visualization1.jpg differ diff --git a/docs/paddlex_gui/images/visualization2.jpg b/docs/paddlex_gui/images/visualization2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..84288e091148e681972d29edfc167a59e9eb50b4 Binary files /dev/null and b/docs/paddlex_gui/images/visualization2.jpg differ diff --git a/docs/paddlex_gui/images/visualization3.jpg b/docs/paddlex_gui/images/visualization3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a9ecb6d7cbd77979ea7c0d2f6bb3882fc5af17ff Binary files /dev/null and b/docs/paddlex_gui/images/visualization3.jpg differ diff --git a/docs/paddlex_gui/images/visualization4.jpg b/docs/paddlex_gui/images/visualization4.jpg new file mode 100644 index 0000000000000000000000000000000000000000..10cc8feaabe8f5ab813621a5b0499e29cc9da7fe Binary files /dev/null and b/docs/paddlex_gui/images/visualization4.jpg differ diff --git a/docs/paddlex_gui/images/visualization5.jpg b/docs/paddlex_gui/images/visualization5.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b326a85a1f9f5513d823e9f4535580cc5a948217 Binary files /dev/null and b/docs/paddlex_gui/images/visualization5.jpg differ diff --git a/docs/paddlex_gui/index.rst b/docs/paddlex_gui/index.rst index be17573f2540032b954a4b1b7a96fed5729e32a0..3fc80fc0afa4e87bf60f28140ebcce673e5ee283 100755 --- a/docs/paddlex_gui/index.rst +++ b/docs/paddlex_gui/index.rst @@ -21,7 +21,7 @@ PaddleX GUI是基于PaddleX开发实现的可视化模型训练套件,可以 how_to_use.md xx.md -* PaddleX版本: v0.1.7 +* PaddleX GUI版本: v1.0 * 项目官网: http://www.paddlepaddle.org.cn/paddle/paddlex * 项目GitHub: https://github.com/PaddlePaddle/PaddleX/tree/develop * 官方QQ用户群: 1045148026 diff --git a/docs/quick_start.md b/docs/quick_start.md index dafb902fc66a83de4f5aa6066dd6094b1baa4b86..a24cdadf410abd738750a18b4f5e99b8265cb7d3 100644 --- a/docs/quick_start.md +++ b/docs/quick_start.md @@ -61,7 +61,7 @@ eval_dataset = pdx.datasets.ImageNet( 本文档中使用百度基于蒸馏方法得到的MobileNetV3预训练模型,模型结构与MobileNetV3一致,但精度更高。PaddleX内置了20多种分类模型,查阅[PaddleX模型库](appendix/model_zoo.md)了解更多分类模型。 ``` num_classes = len(train_dataset.labels) -model.pdx.cls.MobileNetV3_small_ssld(num_classes=num_classes) +model = pdx.cls.MobileNetV3_small_ssld(num_classes=num_classes) ``` ### 3.4 定义训练参数 @@ -86,7 +86,7 @@ python train.py ## 5. 训练过程中查看训练指标 模型在训练过程中,所有的迭代信息将以标注输出流的形式,输出到命令执行的终端上,用户也可通过visualdl以可视化的方式查看训练指标的变化,通过如下方式启动visualdl后,在浏览器打开https://0.0.0.0:8001 (或 https://localhost:8001)即可。 ``` -visualdl --logdir output/mobilenetv2/vdl_log --port 8000 +visualdl --logdir output/mobilenetv2/vdl_log --port 8001 ``` ![](./images/vdl1.jpg) diff --git a/docs/slim/index.rst b/docs/slim/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..48a16f6e08f3f80a7048d1666719b9b08e150362 --- /dev/null +++ b/docs/slim/index.rst @@ -0,0 +1,8 @@ +模型压缩 +============================ + +.. toctree:: + :maxdepth: 2 + + prune.md + quant.md diff --git a/docs/slim/prune.md b/docs/slim/prune.md new file mode 100644 index 0000000000000000000000000000000000000000..c1ff51e5e08c2ce8da5e2042d0a1c359a9e64dff --- /dev/null +++ b/docs/slim/prune.md @@ -0,0 +1,54 @@ +# 模型裁剪 + +## 原理介绍 + +模型裁剪用于减小模型的计算量和体积,可以加快模型部署后的预测速度,是一种减小模型大小和降低模型计算复杂度的常用方式,通过裁剪卷积层中Kernel输出通道的大小及其关联层参数大小来实现,其关联裁剪的原理可参见[PaddleSlim相关文档](https://paddlepaddle.github.io/PaddleSlim/algo/algo.html#id16)。**一般而言,在同等模型精度前提下,数据复杂度越低,模型可以被裁剪的比例就越高**。 + +## 裁剪方法 +PaddleX提供了两种方式: + +**1.用户自行计算裁剪配置(推荐),整体流程包含三个步骤,** +> **第一步**: 使用数据集训练原始模型 +> **第二步**:利用第一步训练好的模型,在验证数据集上计算模型中各个参数的敏感度,并将敏感度信息存储至本地文件 +> **第三步**:使用数据集训练裁剪模型(与第一步差异在于需要在`train`接口中,将第二步计算得到的敏感信息文件传给接口的`sensitivities_file`参数) + +> 在如上三个步骤中,**相当于模型共需要训练两遍**,分别对应第一步和第三步,但其中第三步训练的是裁剪后的模型,因此训练速度较第一步会更快。 +> 第二步会遍历模型中的部分裁剪参数,分别计算各个参数裁剪后对于模型在验证集上效果的影响,**因此会反复在验证集上评估多次**。 + +**2.使用PaddleX内置的裁剪方案** +> PaddleX内置的模型裁剪方案是**基于标准数据集**上计算得到的参数敏感度信息,由于不同数据集特征分布会有较大差异,所以该方案相较于第1种方案训练得到的模型**精度一般而言会更低**(**且用户自定义数据集与标准数据集特征分布差异越大,导致训练的模型精度会越低**),仅在用户想节省时间的前提下可以参考使用,使用方式只需一步, + +> **一步**: 使用数据集训练裁剪模型,在训练调用`train`接口时,将接口中的`sensitivities_file`参数设置为'DEFAULT'字符串 + +> 注:各模型内置的裁剪方案分别依据的数据集为: 图像分类——ImageNet数据集、目标检测——PascalVOC数据集、语义分割——CityScape数据集 + +## 裁剪实验 +基于上述两种方案,我们在PaddleX上使用样例数据进行了实验,在Tesla P40上实验指标如下所示, + +### 图像分类 +实验背景:使用MobileNetV2模型,数据集为蔬菜分类示例数据,见[使用教程-模型压缩-图像分类](../tutorials/compress/classification.md) + +| 模型 | 裁剪情况 | 模型大小 | Top1准确率(%) |GPU预测速度 | CPU预测速度 | +| :-----| :--------| :-------- | :---------- |:---------- |:----------| +|MobileNetV2 | 无裁剪(原模型)| 13.0M | 97.50|6.47ms |47.44ms | +|MobileNetV2 | 方案一(eval_metric_loss=0.10) | 2.1M | 99.58 |5.03ms |20.22ms | +|MobileNetV2 | 方案二(eval_metric_loss=0.10) | 6.0M | 99.58 |5.42ms |29.06ms | + +### 目标检测 +实验背景:使用YOLOv3-MobileNetV1模型,数据集为昆虫检测示例数据,见[使用教程-模型压缩-目标检测](../tutorials/compress/detection.md) + + +| 模型 | 裁剪情况 | 模型大小 | MAP(%) |GPU预测速度 | CPU预测速度 | +| :-----| :--------| :-------- | :---------- |:---------- | :---------| +|YOLOv3-MobileNetV1 | 无裁剪(原模型)| 139M | 67.57| 14.88ms |976.42ms | +|YOLOv3-MobileNetV1 | 方案一(eval_metric_loss=0.10) | 34M | 75.49 |10.60ms |558.49ms | +|YOLOv3-MobileNetV1 | 方案二(eval_metric_loss=0.05) | 29M | 50.27| 9.43ms |360.46ms | + +### 语义分割 +实验背景:使用UNet模型,数据集为视盘分割示例数据, 见[使用教程-模型压缩-语义分割](../tutorials/compress/segmentation.md) + +| 模型 | 裁剪情况 | 模型大小 | mIOU(%) |GPU预测速度 | CPU预测速度 | +| :-----| :--------| :-------- | :---------- |:---------- | :---------| +|UNet | 无裁剪(原模型)| 77M | 91.22 |33.28ms |9523.55ms | +|UNet | 方案一(eval_metric_loss=0.10) |26M | 90.37 |21.04ms |3936.20ms | +|UNet | 方案二(eval_metric_loss=0.10) |23M | 91.21 |18.61ms |3447.75ms | diff --git a/docs/slim/quant.md b/docs/slim/quant.md new file mode 100644 index 0000000000000000000000000000000000000000..1686a9fb8d33e770d55a378ebdf76876058514fb --- /dev/null +++ b/docs/slim/quant.md @@ -0,0 +1,11 @@ +# 模型量化 + +## 原理介绍 +为了满足低内存带宽、低功耗、低计算资源占用以及低模型存储等需求,定点量化被提出。为此我们提供了训练后量化,该量化使用KL散度确定量化比例因子,将FP32模型转成INT8模型,且不需要重新训练,可以快速得到量化模型。 + + +## 使用PaddleX量化模型 +PaddleX提供了`export_quant_model`接口,让用户以接口的形式完成模型以post_quantization方式量化并导出。点击查看[量化接口使用文档](../apis/slim.md)。 + +## 量化性能对比 +模型量化后的性能对比指标请查阅[PaddleSlim模型库](https://paddlepaddle.github.io/PaddleSlim/model_zoo.html) diff --git a/docs/tuning_strategy/detection/index.rst b/docs/tuning_strategy/detection/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..5457adeeea053df4de9332bd4df61cd450830f96 --- /dev/null +++ b/docs/tuning_strategy/detection/index.rst @@ -0,0 +1,10 @@ +目标检测 +============================ + +PaddleX针对目标检测任务提供了通过负样本学习降低误检率的策略,用户可根据需求及应用场景使用该策略对模型进行调优。 + +.. toctree:: + :maxdepth: 1 + + negatives_training.md + diff --git a/docs/tuning_strategy/detection/negatives_training.md b/docs/tuning_strategy/detection/negatives_training.md new file mode 100644 index 0000000000000000000000000000000000000000..d3590e3222018faf90462935588a785b8fae4e7f --- /dev/null +++ b/docs/tuning_strategy/detection/negatives_training.md @@ -0,0 +1,101 @@ +# 通过负样本学习降低误检率 + +## 应用场景 + +在背景和目标相似的场景下,模型容易把背景误检成目标。为了降低误检率,可以通过负样本学习来降低误检率,即在训练过程中把无目标真值的图片加入训练。 + +## 效果对比 + +* 与基准模型相比,通过负样本学习后的模型**mmAP有3.6%的提升,mAP有0.1%的提升**。 +* 与基准模型相比,通过负样本学习后的模型在背景图片上的图片级别**误检率降低了49.68%**。 + +表1 违禁品验证集上**框级别精度**对比 + +||mmAP(AP@IoU=0.5:0.95)| mAP (AP@IoU=0.5)| +|:---|:---|:---| +|基准模型 | 45.8% | 83% | +|通过负样本学习后的模型 | 49.4% | 83.1% | + +表2 违禁品验证集上**图片级别的召回率**、无违禁品验证集上**图片级别的误检率**对比 + +||违禁品图片级别的召回率| 无违禁品图片级别的误检率| +|:---|:--------------------|:------------------------| +|基准模型 | 98.97% | 55.27% | +|通过负样本学习后的模型 | 97.75% | 5.59% | + +【名词解释】 + + * 图片级别的召回率:只要在有目标的图片上检测出目标(不论框的个数),该图片被认为召回。批量有目标图片中被召回图片所占的比例,即为图片级别的召回率。 + + * 图片级别的误检率:只要在无目标的图片上检测出目标(不论框的个数),该图片被认为误检。批量无目标图片中被误检图片所占的比例,即为图片级别的误检率。 + + +## 使用方法 + +在定义训练所用的数据集之后,使用数据集类的成员函数`add_negative_samples`将无目标真值的背景图片所在路径传入给训练集。代码示例如下: + +``` +import os +os.environ['CUDA_VISIBLE_DEVICES'] = '0' +from paddlex.det import transforms +import paddlex as pdx + +# 定义训练和验证时的transforms +train_transforms = transforms.ComposedRCNNTransforms( + mode='train', min_max_size=[600, 1000]) +eval_transforms = transforms.ComposedRCNNTransforms( + mode='eval', min_max_size=[600, 1000]) + +# 定义训练所用的数据集 +train_dataset = pdx.datasets.CocoDetection( + data_dir='jinnan2_round1_train_20190305/restricted/', + ann_file='jinnan2_round1_train_20190305/train.json', + transforms=train_transforms, + shuffle=True, + num_workers=2) +# 训练集中加入无目标背景图片 +train_dataset.add_negative_samples( + 'jinnan2_round1_train_20190305/normal_train_back/') + +# 定义验证所用的数据集 +eval_dataset = pdx.datasets.CocoDetection( + data_dir='jinnan2_round1_train_20190305/restricted/', + ann_file='jinnan2_round1_train_20190305/val.json', + transforms=eval_transforms, + num_workers=2) + +# 初始化模型,并进行训练 +model = pdx.det.FasterRCNN(num_classes=len(train_dataset.labels) + 1) +model.train( + num_epochs=17, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + train_batch_size=8, + learning_rate=0.01, + lr_decay_epochs=[13, 16], + save_dir='./output') +``` + +## 实验细则 + +(1) 数据集 + +我们使用X光违禁品数据集对通过负样本学习降低误检率的策略有效性进行了实验验证。该数据集中背景比较繁杂,很多背景物体与目标物体较为相似。 + +* 检测铁壳打火机、黑钉打火机 、刀具、电源和电池、剪刀5种违禁品。 + +* 训练集有883张违禁品图片,验证集有98张违禁品图片。 + +* 无违禁品的X光图片有2540张。 + +(2) 基准模型 + +使用FasterRCNN-ResNet50作为检测模型,除了水平翻转外没有使用其他的数据增强方式,只使用违禁品训练集进行训练。模型在违禁品验证集上的精度见表1,mmAP有45.8%,mAP达到83%。 + +(3) 通过负样本学习后的模型 + +把无违禁品的X光图片按1:1分成无违禁品训练集和无违禁品验证集。我们将基准模型在无违禁品验证集进行测试,发现图片级别的误检率高达55.27%。为了降低该误检率,将基准模型在无违禁品训练集进行测试,挑选出被误检图片共663张,将这663张图片加入训练,训练参数配置与基准模型训练时一致。 + +通过负样本学习后的模型在违禁品验证集上的精度见表1,mmAP有49.4%,mAP达到83.1%。与基准模型相比,**mmAP有3.6%的提升,mAP有0.1%的提升**。通过负样本学习后的模型在无违禁品验证集的误检率仅有5.58%,与基准模型相比,**误检率降低了49.68%**。 + +此外,还测试了两个模型在有违禁品验证集上图片级别的召回率,见表2,与基准模型相比,通过负样本学习后的模型仅漏检了1张图片,召回率几乎是无损的。 diff --git a/docs/tuning_strategy/index.rst b/docs/tuning_strategy/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..f9d5cd50f914609f864135dfba922f857f771dbf --- /dev/null +++ b/docs/tuning_strategy/index.rst @@ -0,0 +1,7 @@ +PaddleX调优策略介绍 +============================ + +.. toctree:: + :maxdepth: 2 + + detection/index.rst diff --git a/docs/tutorials/dataset_prepare.md b/docs/tutorials/dataset_prepare.md index 87f368bc29d80aff12b9f6597a1d3901b427c4d7..95d1094c216857d4dc708cf39be74dca98d78f59 100644 --- a/docs/tutorials/dataset_prepare.md +++ b/docs/tutorials/dataset_prepare.md @@ -1,7 +1,3 @@ # 数据准备 -## 数据标注 - -## 主流标注软件支持 - -## EasyData数据标注支持 +该部分内容已迁移至[附录](../appendix/datasets.md) diff --git a/docs/tutorials/deploy/deploy_lite.md b/docs/tutorials/deploy/deploy_lite.md index 392e945dea2465ca4f6f40f2a131f7cad19db03a..5419aed636545b95e9f98fdd45109592b7a6d9d6 100644 --- a/docs/tutorials/deploy/deploy_lite.md +++ b/docs/tutorials/deploy/deploy_lite.md @@ -1,5 +1,12 @@ # 移动端部署 +PaddleX的移动端部署由PaddleLite实现,部署的流程如下,首先将训练好的模型导出为inference model,然后使用PaddleLite的python接口对模型进行优化,最后使用PaddleLite的预测库进行部署, +PaddleLite的详细介绍和使用可参考:[PaddleLite文档](https://paddle-lite.readthedocs.io/zh/latest/) + +> PaddleX --> Inference Model --> PaddleLite Opt --> PaddleLite Inference + +以下介绍如何将PaddleX导出为inference model,然后使用PaddleLite的OPT模块对模型进行优化: + step 1: 安装PaddleLite ``` @@ -9,14 +16,21 @@ pip install paddlelite step 2: 将PaddleX模型导出为inference模型 参考[导出inference模型](deploy_server/deploy_python.html#inference)将模型导出为inference格式模型。 +**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](./upgrade_version.md)对模型版本进行升级。** step 3: 将inference模型转换成PaddleLite模型 ``` -python /path/to/PaddleX/deploy/lite/export_lite.py --model_path /path/to/inference_model --save_dir /path/to/onnx_model +python /path/to/PaddleX/deploy/lite/export_lite.py --model_dir /path/to/inference_model --save_file /path/to/onnx_model --place place/to/run + ``` -`--model_path`用于指定inference模型的路径,`--save_dir`用于指定Lite模型的保存路径。 +| 参数 | 说明 | +| ---- | ---- | +| model_dir | 预测模型所在路径,包含"__model__", "__params__"文件 | +| save_file | 模型输出的名称,默认为"paddlex.nb" | +| place | 运行的平台,可选:arm|opencl|x86|npu|xpu|rknpu|apu | + step 4: 预测 diff --git a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md index b4edf3510ae992d72ea60e1078f22e12d54357c2..838195f14ff108de838f04d5514101b17280f4dd 100755 --- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md +++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md @@ -96,6 +96,17 @@ cmake .. \ make ``` +**注意:** linux环境下编译会自动下载OPENCV, PaddleX-Encryption和YAML,如果编译环境无法访问外网,可手动下载: + +- [opencv3gcc4.8.tar.bz2](https://paddleseg.bj.bcebos.com/deploy/docker/opencv3gcc4.8.tar.bz2) +- [paddlex-encryption.zip](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip) +- [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip) + +opencv3gcc4.8.tar.bz2文件下载后解压,然后在script/build.sh中指定`OPENCE_DIR`为解压后的路径。 + +paddlex-encryption.zip文件下载后解压,然后在script/build.sh中指定`ENCRYPTION_DIR`为解压后的路径。 + +yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址,改为下载文件的路径。 修改脚本设置好主要参数后,执行`build`脚本: ```shell @@ -104,7 +115,9 @@ make ### Step5: 预测及可视化 -参考[导出inference模型](../deploy_python.html#inference)将模型导出为inference格式模型。 +**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。** + +> **注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。** 编译成功后,预测demo的可执行程序分别为`build/demo/detector`,`build/demo/classifer`,`build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下: @@ -116,7 +129,7 @@ make | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) | | use_trt | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) | | gpu_id | GPU 设备ID, 默认值为0 | -| save_dir | 保存可视化结果的路径, 默认值为"output",classfier无该参数 | +| save_dir | 保存可视化结果的路径, 默认值为"output",**classfier无该参数** | ## 样例 diff --git a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md index 2f7c62766291410ec8e48a77b7e814edeb1523bb..86c93e63f12f7e98b73cd0bd4e02b3cdc66d21a7 100755 --- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md +++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md @@ -86,7 +86,14 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens | OPENCV_DIR | OpenCV的安装路径, | | PADDLE_DIR | Paddle c++预测库的路径 | -**注意:** 1. 使用`CPU`版预测库,请把`WITH_GPU`的`值`去掉勾 2. 如果使用的是`openblas`版本,请把`WITH_MKL`的`值`去掉勾 +**注意:** +1. 使用`CPU`版预测库,请把`WITH_GPU`的`值`去掉勾 + +2. 如果使用的是`openblas`版本,请把`WITH_MKL`的`值`去掉勾 + +3. Windows环境下编译会自动下载YAML,如果编译环境无法访问外网,可手动下载: [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip) + +yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址,改为下载文件的路径。 ![step4](../../images/vs2019_step5.png) @@ -99,7 +106,10 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens ### Step5: 预测及可视化 -参考[导出inference模型](../deploy_python.html#inference)将模型导出为inference格式模型。 + +**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。** + +**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。** 上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录: @@ -122,7 +132,7 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release ## 样例 -可使用[小度熊识别模型](../deploy_python.html#inference)中导出的`inference_model`和测试图片进行预测。 +可使用[小度熊识别模型](../deploy_python.md)中导出的`inference_model`和测试图片进行预测。 `样例一`: diff --git a/docs/tutorials/deploy/deploy_server/deploy_python.md b/docs/tutorials/deploy/deploy_server/deploy_python.md index c597f87cdbbc208ad2b72a8305642da41b9be5cd..321d48077fd0478234e8ce6386c7355c36d1c63c 100644 --- a/docs/tutorials/deploy/deploy_server/deploy_python.md +++ b/docs/tutorials/deploy/deploy_server/deploy_python.md @@ -20,6 +20,8 @@ paddlex --export_inference --model_dir=./xiaoduxiong_epoch_12 --save_dir=./infer ``` ## 预测部署 +**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。** + > 点击下载测试图片 [xiaoduxiong_test_image.tar.gz](https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_test_image.tar.gz) ``` diff --git a/docs/tutorials/deploy/deploy_server/encryption.md b/docs/tutorials/deploy/deploy_server/encryption.md index 71b07048ed8372b4c4b9aa0b2082dc9ed9f2f9a0..7090421823bb3bbe2017818a3fc2f7e96608dda9 100644 --- a/docs/tutorials/deploy/deploy_server/encryption.md +++ b/docs/tutorials/deploy/deploy_server/encryption.md @@ -61,7 +61,7 @@ paddlex-encryption ./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /path/to/paddlex_inference_model -save_dir /path/to/paddlex_encrypted_model ``` -`-model_dir`用于指定inference模型路径(参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型),可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=` +`-model_dir`用于指定inference模型路径(参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型),可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`(**注意**:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。)。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=` ![](../images/encrypt.png) diff --git a/docs/tutorials/deploy/upgrade_version.md b/docs/tutorials/deploy/upgrade_version.md new file mode 100644 index 0000000000000000000000000000000000000000..aac33928448d75bf6965fbadbd4ff114e6156196 --- /dev/null +++ b/docs/tutorials/deploy/upgrade_version.md @@ -0,0 +1,14 @@ +# 模型版本升级 + +由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,用户需要按照以下步骤对模型版本进行转换,转换后的模型可以在多端上完成部署。 + +## 检查模型版本 + +存放模型的文件夹存有一个`model.yml`文件,该文件的最后一行`version`值表示模型的版本号,若版本号小于1.0.0,则需要进行版本转换,若版本号大于及等于1.0.0,则不需要进行版本转换。 + +## 版本转换 + +``` +paddlex --export_inference --model_dir=/path/to/low_version_model --save_dir=SSpath/to/high_version_model +``` +`--model_dir`为版本号小于1.0.0的模型路径,可以是PaddleX训练过程保存的模型,也可以是导出为inference格式的模型。`--save_dir`为转换为高版本的模型,后续可用于多端部署。 \ No newline at end of file diff --git a/new_tutorials/train/README.md b/new_tutorials/train/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fc319d16d0c795f856600355d43c18ef413eae0e --- /dev/null +++ b/new_tutorials/train/README.md @@ -0,0 +1,21 @@ +# 使用教程——训练模型 + +本目录下整理了使用PaddleX训练模型的示例代码,代码中均提供了示例数据的自动下载,并均使用单张GPU卡进行训练。 + +|代码 | 模型任务 | 数据 | +|------|--------|---------| +|classification/mobilenetv2.py | 图像分类MobileNetV2 | 蔬菜分类 | +|classification/resnet50.py | 图像分类ResNet50 | 蔬菜分类 | +|detection/faster_rcnn_r50_fpn.py | 目标检测FasterRCNN | 昆虫检测 | +|detection/mask_rcnn_f50_fpn.py | 实例分割MaskRCNN | 垃圾分拣 | +|segmentation/deeplabv3p.py | 语义分割DeepLabV3| 视盘分割 | +|segmentation/unet.py | 语义分割UNet | 视盘分割 | +|segmentation/hrnet.py | 语义分割HRNet | 视盘分割 | +|segmentation/fast_scnn.py | 语义分割FastSCNN | 视盘分割 | + + +## 开始训练 +在安装PaddleX后,使用如下命令开始训练 +``` +python classification/mobilenetv2.py +``` diff --git a/new_tutorials/train/classification/mobilenetv2.py b/new_tutorials/train/classification/mobilenetv2.py new file mode 100644 index 0000000000000000000000000000000000000000..9a075526a3cbb7e560c133f08faef68ea5a07121 --- /dev/null +++ b/new_tutorials/train/classification/mobilenetv2.py @@ -0,0 +1,47 @@ +import os +# 选择使用0号卡 +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +from paddlex.cls import transforms +import paddlex as pdx + +# 下载和解压蔬菜分类数据集 +veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz' +pdx.utils.download_and_decompress(veg_dataset, path='./') + +# 定义训练和验证时的transforms +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms +train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224]) +eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224]) + +# 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet +train_dataset = pdx.datasets.ImageNet( + data_dir='vegetables_cls', + file_list='vegetables_cls/train_list.txt', + label_list='vegetables_cls/labels.txt', + transforms=train_transforms, + shuffle=True) +eval_dataset = pdx.datasets.ImageNet( + data_dir='vegetables_cls', + file_list='vegetables_cls/val_list.txt', + label_list='vegetables_cls/labels.txt', + transforms=eval_transforms) + +# 初始化模型,并进行训练 +# 可使用VisualDL查看训练指标 +# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001 +# 浏览器打开 https://0.0.0.0:8001即可 +# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP + +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50 +model = pdx.cls.MobileNetV2(num_classes=len(train_dataset.labels)) +model.train( + num_epochs=10, + train_dataset=train_dataset, + train_batch_size=32, + eval_dataset=eval_dataset, + lr_decay_epochs=[4, 6, 8], + learning_rate=0.025, + save_dir='output/mobilenetv2', + use_vdl=True) diff --git a/new_tutorials/train/classification/resnet50.py b/new_tutorials/train/classification/resnet50.py new file mode 100644 index 0000000000000000000000000000000000000000..bf56a605f1c3376057c1ab9283fa1251491b2750 --- /dev/null +++ b/new_tutorials/train/classification/resnet50.py @@ -0,0 +1,56 @@ +import os +# 选择使用0号卡 +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +import paddle.fluid as fluid +from paddlex.cls import transforms +import paddlex as pdx + +# 下载和解压蔬菜分类数据集 +veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz' +pdx.utils.download_and_decompress(veg_dataset, path='./') + +# 定义训练和验证时的transforms +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms +train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224]) +eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224]) + +# 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet +train_dataset = pdx.datasets.ImageNet( + data_dir='vegetables_cls', + file_list='vegetables_cls/train_list.txt', + label_list='vegetables_cls/labels.txt', + transforms=train_transforms, + shuffle=True) +eval_dataset = pdx.datasets.ImageNet( + data_dir='vegetables_cls', + file_list='vegetables_cls/val_list.txt', + label_list='vegetables_cls/labels.txt', + transforms=eval_transforms) + +# PaddleX支持自定义构建优化器 +step_each_epoch = train_dataset.num_samples // 32 +learning_rate = fluid.layers.cosine_decay( + learning_rate=0.025, step_each_epoch=step_each_epoch, epochs=10) +optimizer = fluid.optimizer.Momentum( + learning_rate=learning_rate, + momentum=0.9, + regularization=fluid.regularizer.L2Decay(4e-5)) + +# 初始化模型,并进行训练 +# 可使用VisualDL查看训练指标 +# VisualDL启动方式: visualdl --logdir output/resnet50/vdl_log --port 8001 +# 浏览器打开 https://0.0.0.0:8001即可 +# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP + +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50 +model = pdx.cls.ResNet50(num_classes=len(train_dataset.labels)) +model.train( + num_epochs=10, + train_dataset=train_dataset, + train_batch_size=32, + eval_dataset=eval_dataset, + optimizer=optimizer, + save_dir='output/resnet50', + use_vdl=True) diff --git a/new_tutorials/train/detection/faster_rcnn_r50_fpn.py b/new_tutorials/train/detection/faster_rcnn_r50_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..a64b711c3af48cb85cfd8a82938785ca386a99ec --- /dev/null +++ b/new_tutorials/train/detection/faster_rcnn_r50_fpn.py @@ -0,0 +1,49 @@ +import os +# 选择使用0号卡 +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +from paddlex.det import transforms +import paddlex as pdx + +# 下载和解压昆虫检测数据集 +insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz' +pdx.utils.download_and_decompress(insect_dataset, path='./') + +# 定义训练和验证时的transforms +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms +train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333]) +eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333]) + +# 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection +train_dataset = pdx.datasets.VOCDetection( + data_dir='insect_det', + file_list='insect_det/train_list.txt', + label_list='insect_det/labels.txt', + transforms=train_transforms, + shuffle=True) +eval_dataset = pdx.datasets.VOCDetection( + data_dir='insect_det', + file_list='insect_det/val_list.txt', + label_list='insect_det/labels.txt', + transforms=eval_transforms) + +# 初始化模型,并进行训练 +# 可使用VisualDL查看训练指标 +# VisualDL启动方式: visualdl --logdir output/faster_rcnn_r50_fpn/vdl_log --port 8001 +# 浏览器打开 https://0.0.0.0:8001即可 +# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP +# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1 + +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn +num_classes = len(train_dataset.labels) + 1 +model = pdx.det.FasterRCNN(num_classes=num_classes) +model.train( + num_epochs=12, + train_dataset=train_dataset, + train_batch_size=2, + eval_dataset=eval_dataset, + learning_rate=0.0025, + lr_decay_epochs=[8, 11], + save_dir='output/faster_rcnn_r50_fpn', + use_vdl=True) diff --git a/new_tutorials/train/detection/mask_rcnn_r50_fpn.py b/new_tutorials/train/detection/mask_rcnn_r50_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..f2ebf6e20f18054bf16452eb6e60b9ea24f20748 --- /dev/null +++ b/new_tutorials/train/detection/mask_rcnn_r50_fpn.py @@ -0,0 +1,48 @@ +import os +# 选择使用0号卡 +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +from paddlex.det import transforms +import paddlex as pdx + +# 下载和解压小度熊分拣数据集 +xiaoduxiong_dataset = 'https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_det.tar.gz' +pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./') + +# 定义训练和验证时的transforms +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms +train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333]) +eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333]) + +# 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#cocodetection +train_dataset = pdx.datasets.CocoDetection( + data_dir='xiaoduxiong_ins_det/JPEGImages', + ann_file='xiaoduxiong_ins_det/train.json', + transforms=train_transforms, + shuffle=True) +eval_dataset = pdx.datasets.CocoDetection( + data_dir='xiaoduxiong_ins_det/JPEGImages', + ann_file='xiaoduxiong_ins_det/val.json', + transforms=eval_transforms) + +# 初始化模型,并进行训练 +# 可使用VisualDL查看训练指标 +# VisualDL启动方式: visualdl --logdir output/mask_rcnn_r50_fpn/vdl_log --port 8001 +# 浏览器打开 https://0.0.0.0:8001即可 +# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP +# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1 + +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn +num_classes = len(train_dataset.labels) + 1 +model = pdx.det.MaskRCNN(num_classes=num_classes) +model.train( + num_epochs=12, + train_dataset=train_dataset, + train_batch_size=1, + eval_dataset=eval_dataset, + learning_rate=0.00125, + warmup_steps=10, + lr_decay_epochs=[8, 11], + save_dir='output/mask_rcnn_r50_fpn', + use_vdl=True) diff --git a/new_tutorials/train/detection/yolov3_darknet53.py b/new_tutorials/train/detection/yolov3_darknet53.py new file mode 100644 index 0000000000000000000000000000000000000000..8027a506458aac94de82a915aa8b058d71ba97f7 --- /dev/null +++ b/new_tutorials/train/detection/yolov3_darknet53.py @@ -0,0 +1,48 @@ +import os +# 选择使用0号卡 +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +from paddlex.det import transforms +import paddlex as pdx + +# 下载和解压昆虫检测数据集 +insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz' +pdx.utils.download_and_decompress(insect_dataset, path='./') + +# 定义训练和验证时的transforms +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedyolotransforms +train_transforms = transforms.ComposedYOLOv3Transforms(mode='train', shape=[608, 608]) +eval_transforms = transforms.ComposedYOLOv3Transforms(mode='eva', shape=[608, 608]) + +# 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection +train_dataset = pdx.datasets.VOCDetection( + data_dir='insect_det', + file_list='insect_det/train_list.txt', + label_list='insect_det/labels.txt', + transforms=train_transforms, + shuffle=True) +eval_dataset = pdx.datasets.VOCDetection( + data_dir='insect_det', + file_list='insect_det/val_list.txt', + label_list='insect_det/labels.txt', + transforms=eval_transforms) + +# 初始化模型,并进行训练 +# 可使用VisualDL查看训练指标 +# VisualDL启动方式: visualdl --logdir output/yolov3_darknet/vdl_log --port 8001 +# 浏览器打开 https://0.0.0.0:8001即可 +# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP + +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3 +num_classes = len(train_dataset.labels) +model = pdx.det.YOLOv3(num_classes=num_classes, backbone='DarkNet53') +model.train( + num_epochs=270, + train_dataset=train_dataset, + train_batch_size=8, + eval_dataset=eval_dataset, + learning_rate=0.000125, + lr_decay_epochs=[210, 240], + save_dir='output/yolov3_darknet53', + use_vdl=True) diff --git a/new_tutorials/train/segmentation/deeplabv3p.py b/new_tutorials/train/segmentation/deeplabv3p.py new file mode 100644 index 0000000000000000000000000000000000000000..cb18fcfad65331d02b04abe3c3a76fa0356fb5b8 --- /dev/null +++ b/new_tutorials/train/segmentation/deeplabv3p.py @@ -0,0 +1,51 @@ +import os +# 选择使用0号卡 +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +import paddlex as pdx +from paddlex.seg import transforms + +# 下载和解压视盘分割数据集 +optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz' +pdx.utils.download_and_decompress(optic_dataset, path='./') + +# 定义训练和验证时的transforms +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms +train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769]) +eval_transforms = transforms.ComposedSegTransforms(mode='eval') + +train_transforms.add_augmenters([ + transforms.RandomRotate() +]) + +# 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset +train_dataset = pdx.datasets.SegDataset( + data_dir='optic_disc_seg', + file_list='optic_disc_seg/train_list.txt', + label_list='optic_disc_seg/labels.txt', + transforms=train_transforms, + shuffle=True) +eval_dataset = pdx.datasets.SegDataset( + data_dir='optic_disc_seg', + file_list='optic_disc_seg/val_list.txt', + label_list='optic_disc_seg/labels.txt', + transforms=eval_transforms) + +# 初始化模型,并进行训练 +# 可使用VisualDL查看训练指标 +# VisualDL启动方式: visualdl --logdir output/deeplab/vdl_log --port 8001 +# 浏览器打开 https://0.0.0.0:8001即可 +# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP + +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p +num_classes = len(train_dataset.labels) +model = pdx.seg.DeepLabv3p(num_classes=num_classes) +model.train( + num_epochs=40, + train_dataset=train_dataset, + train_batch_size=4, + eval_dataset=eval_dataset, + learning_rate=0.01, + save_dir='output/deeplab', + use_vdl=True) diff --git a/new_tutorials/train/segmentation/fast_scnn.py b/new_tutorials/train/segmentation/fast_scnn.py new file mode 100644 index 0000000000000000000000000000000000000000..53f1a528a090d6d4f278e47b54b2660dccde2e0d --- /dev/null +++ b/new_tutorials/train/segmentation/fast_scnn.py @@ -0,0 +1,48 @@ +import os +# 选择使用0号卡 +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +import paddlex as pdx +from paddlex.seg import transforms + +# 下载和解压视盘分割数据集 +optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz' +pdx.utils.download_and_decompress(optic_dataset, path='./') + +# 定义训练和验证时的transforms +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms +train_transforms = transforms.ComposedSegTransforms( + mode='train', train_crop_size=[769, 769]) +eval_transforms = transforms.ComposedSegTransforms(mode='eval') + +# 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset +train_dataset = pdx.datasets.SegDataset( + data_dir='optic_disc_seg', + file_list='optic_disc_seg/train_list.txt', + label_list='optic_disc_seg/labels.txt', + transforms=train_transforms, + shuffle=True) +eval_dataset = pdx.datasets.SegDataset( + data_dir='optic_disc_seg', + file_list='optic_disc_seg/val_list.txt', + label_list='optic_disc_seg/labels.txt', + transforms=eval_transforms) + +# 初始化模型,并进行训练 +# 可使用VisualDL查看训练指标 +# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001 +# 浏览器打开 https://0.0.0.0:8001即可 +# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP + +# https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet +num_classes = len(train_dataset.labels) +model = pdx.seg.FastSCNN(num_classes=num_classes) +model.train( + num_epochs=20, + train_dataset=train_dataset, + train_batch_size=4, + eval_dataset=eval_dataset, + learning_rate=0.01, + save_dir='output/fastscnn', + use_vdl=True) diff --git a/new_tutorials/train/segmentation/hrnet.py b/new_tutorials/train/segmentation/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..98fdd1b925bd4707001fdad56b3ffdc6bb2b58ae --- /dev/null +++ b/new_tutorials/train/segmentation/hrnet.py @@ -0,0 +1,47 @@ +import os +# 选择使用0号卡 +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +import paddlex as pdx +from paddlex.seg import transforms + +# 下载和解压视盘分割数据集 +optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz' +pdx.utils.download_and_decompress(optic_dataset, path='./') + +# 定义训练和验证时的transforms +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms +train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769]) +eval_transforms = transforms.ComposedSegTransforms(mode='eval') + +# 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset +train_dataset = pdx.datasets.SegDataset( + data_dir='optic_disc_seg', + file_list='optic_disc_seg/train_list.txt', + label_list='optic_disc_seg/labels.txt', + transforms=train_transforms, + shuffle=True) +eval_dataset = pdx.datasets.SegDataset( + data_dir='optic_disc_seg', + file_list='optic_disc_seg/val_list.txt', + label_list='optic_disc_seg/labels.txt', + transforms=eval_transforms) + +# 初始化模型,并进行训练 +# 可使用VisualDL查看训练指标 +# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001 +# 浏览器打开 https://0.0.0.0:8001即可 +# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP + +# https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet +num_classes = len(train_dataset.labels) +model = pdx.seg.HRNet(num_classes=num_classes) +model.train( + num_epochs=20, + train_dataset=train_dataset, + train_batch_size=4, + eval_dataset=eval_dataset, + learning_rate=0.01, + save_dir='output/hrnet', + use_vdl=True) diff --git a/new_tutorials/train/segmentation/unet.py b/new_tutorials/train/segmentation/unet.py new file mode 100644 index 0000000000000000000000000000000000000000..ddf4f7991a690b0d0d506967df0c140f60945e85 --- /dev/null +++ b/new_tutorials/train/segmentation/unet.py @@ -0,0 +1,47 @@ +import os +# 选择使用0号卡 +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +import paddlex as pdx +from paddlex.seg import transforms + +# 下载和解压视盘分割数据集 +optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz' +pdx.utils.download_and_decompress(optic_dataset, path='./') + +# 定义训练和验证时的transforms +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms +train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769]) +eval_transforms = transforms.ComposedSegTransforms(mode='eval') + +# 定义训练和验证所用的数据集 +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset +train_dataset = pdx.datasets.SegDataset( + data_dir='optic_disc_seg', + file_list='optic_disc_seg/train_list.txt', + label_list='optic_disc_seg/labels.txt', + transforms=train_transforms, + shuffle=True) +eval_dataset = pdx.datasets.SegDataset( + data_dir='optic_disc_seg', + file_list='optic_disc_seg/val_list.txt', + label_list='optic_disc_seg/labels.txt', + transforms=eval_transforms) + +# 初始化模型,并进行训练 +# 可使用VisualDL查看训练指标 +# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001 +# 浏览器打开 https://0.0.0.0:8001即可 +# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP + +# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet +num_classes = len(train_dataset.labels) +model = pdx.seg.UNet(num_classes=num_classes) +model.train( + num_epochs=20, + train_dataset=train_dataset, + train_batch_size=4, + eval_dataset=eval_dataset, + learning_rate=0.01, + save_dir='output/unet', + use_vdl=True) diff --git a/paddlex/__init__.py b/paddlex/__init__.py index de762df7ad7dc01670e795b93f709bb23a08f1c8..b80363f2e6adfdbd6ce712cfec486540753abbb7 100644 --- a/paddlex/__init__.py +++ b/paddlex/__init__.py @@ -53,4 +53,4 @@ log_level = 2 from . import interpret -__version__ = '1.0.2.github' +__version__ = '1.0.6' diff --git a/paddlex/cls.py b/paddlex/cls.py index e440c726b639ac6d78cc3c62dd8ef2df7bf8a327..90c5eefce512c966a04975ebfe6457613012c872 100644 --- a/paddlex/cls.py +++ b/paddlex/cls.py @@ -36,5 +36,7 @@ DenseNet121 = cv.models.DenseNet121 DenseNet161 = cv.models.DenseNet161 DenseNet201 = cv.models.DenseNet201 ShuffleNetV2 = cv.models.ShuffleNetV2 +HRNet_W18 = cv.models.HRNet_W18 +AlexNet = cv.models.AlexNet transforms = cv.transforms.cls_transforms diff --git a/paddlex/cv/datasets/coco.py b/paddlex/cv/datasets/coco.py index ff7c2b2d2438fb88d359c94f9ede65d900d9216e..97e791be5ed3cac1656fba4429d90f1653bfe1be 100644 --- a/paddlex/cv/datasets/coco.py +++ b/paddlex/cv/datasets/coco.py @@ -128,7 +128,6 @@ class CocoDetection(VOCDetection): coco_rec = (im_info, label_info) self.file_list.append([im_fname, coco_rec]) - if not len(self.file_list) > 0: raise Exception('not found any coco record in %s' % (ann_file)) logging.info("{} samples in file {}".format( diff --git a/paddlex/cv/datasets/dataset.py b/paddlex/cv/datasets/dataset.py index c3bec8997cb0a04590f4946dc87b0eb8bd3a0c43..3cc8b52c10f41ce17bbe8da7fd5289b96aac409e 100644 --- a/paddlex/cv/datasets/dataset.py +++ b/paddlex/cv/datasets/dataset.py @@ -209,8 +209,8 @@ def GenerateMiniBatch(batch_data): padding_batch = [] for data in batch_data: im_c, im_h, im_w = data[0].shape[:] - padding_im = np.zeros((im_c, max_shape[1], max_shape[2]), - dtype=np.float32) + padding_im = np.zeros( + (im_c, max_shape[1], max_shape[2]), dtype=np.float32) padding_im[:, :im_h, :im_w] = data[0] padding_batch.append((padding_im, ) + data[1:]) return padding_batch @@ -226,8 +226,8 @@ class Dataset: if num_workers == 'auto': import multiprocessing as mp num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 8 else 8 - if platform.platform().startswith( - "Darwin") or platform.platform().startswith("Windows"): + if platform.platform().startswith("Darwin") or platform.platform( + ).startswith("Windows"): parallel_method = 'thread' if transforms is None: raise Exception("transform should be defined.") diff --git a/paddlex/cv/datasets/voc.py b/paddlex/cv/datasets/voc.py index 0e0ad353250ef176cc7937f998b6edfce4fa7919..6722f296004d45f4c93cce61c1a3a94408daf17d 100644 --- a/paddlex/cv/datasets/voc.py +++ b/paddlex/cv/datasets/voc.py @@ -14,6 +14,7 @@ from __future__ import absolute_import import copy +import os import os.path as osp import random import re @@ -122,9 +123,13 @@ class VOCDetection(Dataset): is_crowd = np.zeros((len(objs), 1), dtype=np.int32) difficult = np.zeros((len(objs), 1), dtype=np.int32) for i, obj in enumerate(objs): +<<<<<<< HEAD pattern = re.compile('', re.IGNORECASE) name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1] cname = obj.find(name_tag).text.strip() +======= + cname = obj.find('name').text.strip() +>>>>>>> e9b8c938bf48a74eba95458257b512cfcbdbcca3 gt_class[i][0] = cname2cid[cname] pattern = re.compile('', re.IGNORECASE) diff_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1] @@ -197,6 +202,44 @@ class VOCDetection(Dataset): self.coco_gt.dataset = annotations self.coco_gt.createIndex() + def add_negative_samples(self, image_dir): + import cv2 + if not osp.exists(image_dir): + raise Exception("{} background images directory does not exist.". + format(image_dir)) + image_list = os.listdir(image_dir) + max_img_id = max(self.coco_gt.getImgIds()) + for image in image_list: + if not is_pic(image): + continue + # False ground truth + gt_bbox = np.array([[0, 0, 1e-05, 1e-05]], dtype=np.float32) + gt_class = np.array([[0]], dtype=np.int32) + gt_score = np.ones((1, 1), dtype=np.float32) + is_crowd = np.array([[0]], dtype=np.int32) + difficult = np.zeros((1, 1), dtype=np.int32) + gt_poly = [[[0, 0, 0, 1e-05, 1e-05, 1e-05, 1e-05, 0]]] + + max_img_id += 1 + im_fname = osp.join(image_dir, image) + img_data = cv2.imread(im_fname) + im_h, im_w, im_c = img_data.shape + im_info = { + 'im_id': np.array([max_img_id]).astype('int32'), + 'image_shape': np.array([im_h, im_w]).astype('int32'), + } + label_info = { + 'is_crowd': is_crowd, + 'gt_class': gt_class, + 'gt_bbox': gt_bbox, + 'gt_score': gt_score, + 'difficult': difficult, + 'gt_poly': gt_poly + } + coco_rec = (im_info, label_info) + self.file_list.append([im_fname, coco_rec]) + self.num_samples = len(self.file_list) + def iterator(self): self._epoch += 1 self._pos = 0 diff --git a/paddlex/cv/models/__init__.py b/paddlex/cv/models/__init__.py index 6c75179f893c286be9c00dc98d96ee1768e4a3ec..1c7e4b35bc7387c3f5c536e74edc0feafa1811d9 100644 --- a/paddlex/cv/models/__init__.py +++ b/paddlex/cv/models/__init__.py @@ -34,11 +34,15 @@ from .classifier import DenseNet121 from .classifier import DenseNet161 from .classifier import DenseNet201 from .classifier import ShuffleNetV2 +from .classifier import HRNet_W18 +from .classifier import AlexNet from .base import BaseAPI from .yolo_v3 import YOLOv3 from .faster_rcnn import FasterRCNN from .mask_rcnn import MaskRCNN from .unet import UNet from .deeplabv3p import DeepLabv3p +from .hrnet import HRNet +from .fast_scnn import FastSCNN from .load_model import load_model from .slim import prune diff --git a/paddlex/cv/models/base.py b/paddlex/cv/models/base.py index 9d66df7009ff4daf09112b4709e30c39eb38ab67..e30a2529c5a7ff9cbcafb4a05d58f53ea5476e7e 100644 --- a/paddlex/cv/models/base.py +++ b/paddlex/cv/models/base.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import paddle.fluid as fluid @@ -79,9 +79,9 @@ class BaseAPI: return int(batch_size // len(self.places)) else: raise Exception("Please support correct batch_size, \ - which can be divided by available cards({}) in {}". - format(paddlex.env_info['num'], - paddlex.env_info['place'])) + which can be divided by available cards({}) in {}" + .format(paddlex.env_info['num'], paddlex.env_info[ + 'place'])) def build_program(self): # 构建训练网络 @@ -194,12 +194,37 @@ class BaseAPI: if os.path.exists(pretrain_dir): os.remove(pretrain_dir) os.makedirs(pretrain_dir) + if pretrain_weights is not None and not os.path.exists( + pretrain_weights): + if self.model_type == 'classifier': + if pretrain_weights not in ['IMAGENET']: + logging.warning( + "Pretrain_weights for classifier should be defined as directory path or parameter file or 'IMAGENET' or None, but it is {}, so we force to set it as 'IMAGENET'". + format(pretrain_weights)) + pretrain_weights = 'IMAGENET' + elif self.model_type == 'detector': + if pretrain_weights not in ['IMAGENET', 'COCO']: + logging.warning( + "Pretrain_weights for detector should be defined as directory path or parameter file or 'IMAGENET' or 'COCO' or None, but it is {}, so we force to set it as 'IMAGENET'". + format(pretrain_weights)) + pretrain_weights = 'IMAGENET' + elif self.model_type == 'segmenter': + if pretrain_weights not in [ + 'IMAGENET', 'COCO', 'CITYSCAPES' + ]: + logging.warning( + "Pretrain_weights for segmenter should be defined as directory path or parameter file or 'IMAGENET' or 'COCO' or 'CITYSCAPES', but it is {}, so we force to set it as 'IMAGENET'". + format(pretrain_weights)) + pretrain_weights = 'IMAGENET' if hasattr(self, 'backbone'): backbone = self.backbone else: backbone = self.__class__.__name__ + if backbone == "HRNet": + backbone = backbone + "_W{}".format(self.width) + class_name = self.__class__.__name__ pretrain_weights = get_pretrain_weights( - pretrain_weights, self.model_type, backbone, pretrain_dir) + pretrain_weights, class_name, backbone, pretrain_dir) if startup_prog is None: startup_prog = fluid.default_startup_program() self.exe.run(startup_prog) @@ -210,8 +235,8 @@ class BaseAPI: paddlex.utils.utils.load_pretrain_weights( self.exe, self.train_prog, resume_checkpoint, resume=True) if not osp.exists(osp.join(resume_checkpoint, "model.yml")): - raise Exception( - "There's not model.yml in {}".format(resume_checkpoint)) + raise Exception("There's not model.yml in {}".format( + resume_checkpoint)) with open(osp.join(resume_checkpoint, "model.yml")) as f: info = yaml.load(f.read(), Loader=yaml.Loader) self.completed_epochs = info['completed_epochs'] @@ -260,6 +285,7 @@ class BaseAPI: info['_Attributes']['num_classes'] = self.num_classes info['_Attributes']['labels'] = self.labels + info['_Attributes']['fixed_input_shape'] = self.fixed_input_shape try: primary_metric_key = list(self.eval_metrics.keys())[0] primary_metric_value = float(self.eval_metrics[primary_metric_key]) @@ -269,13 +295,13 @@ class BaseAPI: except: pass - if hasattr(self.test_transforms, 'to_rgb'): - if self.test_transforms.to_rgb: - info['TransformsMode'] = 'RGB' - else: - info['TransformsMode'] = 'BGR' - if hasattr(self, 'test_transforms'): + if hasattr(self.test_transforms, 'to_rgb'): + if self.test_transforms.to_rgb: + info['TransformsMode'] = 'RGB' + else: + info['TransformsMode'] = 'BGR' + if self.test_transforms is not None: info['Transforms'] = list() for op in self.test_transforms.transforms: @@ -362,8 +388,8 @@ class BaseAPI: # 模型保存成功的标志 open(osp.join(save_dir, '.success'), 'w').close() - logging.info( - "Model for inference deploy saved in {}.".format(save_dir)) + logging.info("Model for inference deploy saved in {}.".format( + save_dir)) def train_loop(self, num_epochs, @@ -377,7 +403,8 @@ class BaseAPI: early_stop=False, early_stop_patience=5): if train_dataset.num_samples < train_batch_size: - raise Exception('The amount of training datset must be larger than batch size.') + raise Exception( + 'The amount of training datset must be larger than batch size.') if not osp.isdir(save_dir): if osp.exists(save_dir): os.remove(save_dir) @@ -415,8 +442,8 @@ class BaseAPI: build_strategy=build_strategy, exec_strategy=exec_strategy) - total_num_steps = math.floor( - train_dataset.num_samples / train_batch_size) + total_num_steps = math.floor(train_dataset.num_samples / + train_batch_size) num_steps = 0 time_stat = list() time_train_one_epoch = None @@ -430,8 +457,8 @@ class BaseAPI: if self.model_type == 'detector': eval_batch_size = self._get_single_card_bs(train_batch_size) if eval_dataset is not None: - total_num_steps_eval = math.ceil( - eval_dataset.num_samples / eval_batch_size) + total_num_steps_eval = math.ceil(eval_dataset.num_samples / + eval_batch_size) if use_vdl: # VisualDL component @@ -473,7 +500,9 @@ class BaseAPI: if use_vdl: for k, v in step_metrics.items(): - log_writer.add_scalar('Metrics/Training(Step): {}'.format(k), v, num_steps) + log_writer.add_scalar( + 'Metrics/Training(Step): {}'.format(k), v, + num_steps) # 估算剩余时间 avg_step_time = np.mean(time_stat) @@ -481,11 +510,12 @@ class BaseAPI: eta = (num_epochs - i - 1) * time_train_one_epoch + ( total_num_steps - step - 1) * avg_step_time else: - eta = ((num_epochs - i) * total_num_steps - step - - 1) * avg_step_time + eta = ((num_epochs - i) * total_num_steps - step - 1 + ) * avg_step_time if time_eval_one_epoch is not None: - eval_eta = (total_eval_times - i // - save_interval_epochs) * time_eval_one_epoch + eval_eta = ( + total_eval_times - i // save_interval_epochs + ) * time_eval_one_epoch else: eval_eta = ( total_eval_times - i // save_interval_epochs @@ -495,16 +525,18 @@ class BaseAPI: logging.info( "[TRAIN] Epoch={}/{}, Step={}/{}, {}, time_each_step={}s, eta={}" .format(i + 1, num_epochs, step + 1, total_num_steps, - dict2str(step_metrics), round( - avg_step_time, 2), eta_str)) + dict2str(step_metrics), + round(avg_step_time, 2), eta_str)) train_metrics = OrderedDict( - zip(list(self.train_outputs.keys()), np.mean(records, axis=0))) + zip(list(self.train_outputs.keys()), np.mean( + records, axis=0))) logging.info('[TRAIN] Epoch {} finished, {} .'.format( i + 1, dict2str(train_metrics))) time_train_one_epoch = time.time() - epoch_start_time epoch_start_time = time.time() # 每间隔save_interval_epochs, 在验证集上评估和对模型进行保存 + self.completed_epochs += 1 eval_epoch_start_time = time.time() if (i + 1) % save_interval_epochs == 0 or i == num_epochs - 1: current_save_dir = osp.join(save_dir, "epoch_{}".format(i + 1)) @@ -518,7 +550,6 @@ class BaseAPI: return_details=True) logging.info('[EVAL] Finished, Epoch={}, {} .'.format( i + 1, dict2str(self.eval_metrics))) - self.completed_epochs += 1 # 保存最优模型 best_accuracy_key = list(self.eval_metrics.keys())[0] current_accuracy = self.eval_metrics[best_accuracy_key] @@ -534,7 +565,8 @@ class BaseAPI: if isinstance(v, np.ndarray): if v.size > 1: continue - log_writer.add_scalar("Metrics/Eval(Epoch): {}".format(k), v, i+1) + log_writer.add_scalar( + "Metrics/Eval(Epoch): {}".format(k), v, i + 1) self.save_model(save_dir=current_save_dir) time_eval_one_epoch = time.time() - eval_epoch_start_time eval_epoch_start_time = time.time() @@ -545,4 +577,4 @@ class BaseAPI: best_accuracy)) if eval_dataset is not None and early_stop: if earlystop(current_accuracy): - break \ No newline at end of file + break diff --git a/paddlex/cv/models/classifier.py b/paddlex/cv/models/classifier.py index b474ceeb4bf067ecade50acd9da66960216486ad..17a307d8bdeed77467535bec1216cc9b97bd70e3 100644 --- a/paddlex/cv/models/classifier.py +++ b/paddlex/cv/models/classifier.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import numpy as np @@ -40,20 +40,21 @@ class BaseClassifier(BaseAPI): self.init_params = locals() super(BaseClassifier, self).__init__('classifier') if not hasattr(paddlex.cv.nets, str.lower(model_name)): - raise Exception( - "ERROR: There's no model named {}.".format(model_name)) + raise Exception("ERROR: There's no model named {}.".format( + model_name)) self.model_name = model_name self.labels = None self.num_classes = num_classes self.fixed_input_shape = None def build_net(self, mode='train'): + if self.__class__.__name__ == "AlexNet": + assert self.fixed_input_shape is not None, "In AlexNet, input_shape should be defined, e.g. model = paddlex.cls.AlexNet(num_classes=1000, input_shape=[224, 224])" if self.fixed_input_shape is not None: input_shape = [ None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0] ] - image = fluid.data( - dtype='float32', shape=input_shape, name='image') + image = fluid.data(dtype='float32', shape=input_shape, name='image') else: image = fluid.data( dtype='float32', shape=[None, 3, None, None], name='image') @@ -81,7 +82,8 @@ class BaseClassifier(BaseAPI): del outputs['loss'] return inputs, outputs - def default_optimizer(self, learning_rate, lr_decay_epochs, lr_decay_gamma, + def default_optimizer(self, learning_rate, warmup_steps, warmup_start_lr, + lr_decay_epochs, lr_decay_gamma, num_steps_each_epoch): boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] values = [ @@ -90,6 +92,24 @@ class BaseClassifier(BaseAPI): ] lr_decay = fluid.layers.piecewise_decay( boundaries=boundaries, values=values) + if warmup_steps > 0: + if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch: + logging.error( + "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset", + exit=False) + logging.error( + "See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice", + exit=False) + logging.error( + "warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function". + format(lr_decay_epochs[0] * num_steps_each_epoch, + warmup_steps // num_steps_each_epoch)) + + lr_decay = fluid.layers.linear_lr_warmup( + learning_rate=lr_decay, + warmup_steps=warmup_steps, + start_lr=warmup_start_lr, + end_lr=learning_rate) optimizer = fluid.optimizer.Momentum( lr_decay, momentum=0.9, @@ -107,6 +127,8 @@ class BaseClassifier(BaseAPI): pretrain_weights='IMAGENET', optimizer=None, learning_rate=0.025, + warmup_steps=0, + warmup_start_lr=0.0, lr_decay_epochs=[30, 60, 90], lr_decay_gamma=0.1, use_vdl=False, @@ -129,6 +151,8 @@ class BaseClassifier(BaseAPI): optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器: fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 learning_rate (float): 默认优化器的初始学习率。默认为0.025。 + warmup_steps(int): 学习率从warmup_start_lr上升至设定的learning_rate,所需的步数,默认为0 + warmup_start_lr(float): 学习率在warmup阶段时的起始值,默认为0.0 lr_decay_epochs (list): 默认优化器的学习率衰减轮数。默认为[30, 60, 90]。 lr_decay_gamma (float): 默认优化器的学习率衰减率。默认为0.1。 use_vdl (bool): 是否使用VisualDL进行可视化。默认值为False。 @@ -149,6 +173,8 @@ class BaseClassifier(BaseAPI): num_steps_each_epoch = train_dataset.num_samples // train_batch_size optimizer = self.default_optimizer( learning_rate=learning_rate, + warmup_steps=warmup_steps, + warmup_start_lr=warmup_start_lr, lr_decay_epochs=lr_decay_epochs, lr_decay_gamma=lr_decay_gamma, num_steps_each_epoch=num_steps_each_epoch) @@ -193,8 +219,7 @@ class BaseClassifier(BaseAPI): tuple (metrics, eval_details): 当return_details为True时,增加返回dict, 包含关键字:'true_labels'、'pred_scores',分别代表真实类别id、每个类别的预测得分。 """ - self.arrange_transforms( - transforms=eval_dataset.transforms, mode='eval') + self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval') data_generator = eval_dataset.generator( batch_size=batch_size, drop_last=False) k = min(5, self.num_classes) @@ -206,9 +231,8 @@ class BaseClassifier(BaseAPI): self.test_prog).with_data_parallel( share_vars_from=self.parallel_train_prog) batch_size_each_gpu = self._get_single_card_bs(batch_size) - logging.info( - "Start to evaluating(total_samples={}, total_steps={})...".format( - eval_dataset.num_samples, total_steps)) + logging.info("Start to evaluating(total_samples={}, total_steps={})...". + format(eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm( enumerate(data_generator()), total=total_steps): images = np.array([d[0] for d in data]).astype('float32') @@ -218,15 +242,14 @@ class BaseClassifier(BaseAPI): num_pad_samples = batch_size - num_samples pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) images = np.concatenate([images, pad_images]) - outputs = self.exe.run( - self.parallel_test_prog, - feed={'image': images}, - fetch_list=list(self.test_outputs.values())) + outputs = self.exe.run(self.parallel_test_prog, + feed={'image': images}, + fetch_list=list(self.test_outputs.values())) outputs = [outputs[0][:num_samples]] true_labels.extend(labels) pred_scores.extend(outputs[0].tolist()) - logging.debug("[EVAL] Epoch={}, Step={}/{}".format( - epoch_id, step + 1, total_steps)) + logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step + + 1, total_steps)) pred_top1_label = np.argsort(pred_scores)[:, -1] pred_topk_label = np.argsort(pred_scores)[:, -k:] @@ -263,10 +286,10 @@ class BaseClassifier(BaseAPI): self.arrange_transforms( transforms=self.test_transforms, mode='test') im = self.test_transforms(img_file) - result = self.exe.run( - self.test_prog, - feed={'image': im}, - fetch_list=list(self.test_outputs.values())) + result = self.exe.run(self.test_prog, + feed={'image': im}, + fetch_list=list(self.test_outputs.values()), + use_program_cache=True) pred_label = np.argsort(result[0][0])[::-1][:true_topk] res = [{ 'category_id': l, @@ -400,3 +423,16 @@ class ShuffleNetV2(BaseClassifier): def __init__(self, num_classes=1000): super(ShuffleNetV2, self).__init__( model_name='ShuffleNetV2', num_classes=num_classes) + + +class HRNet_W18(BaseClassifier): + def __init__(self, num_classes=1000): + super(HRNet_W18, self).__init__( + model_name='HRNet_W18', num_classes=num_classes) + + +class AlexNet(BaseClassifier): + def __init__(self, num_classes=1000, input_shape=None): + super(AlexNet, self).__init__( + model_name='AlexNet', num_classes=num_classes) + self.fixed_input_shape = input_shape diff --git a/paddlex/cv/models/deeplabv3p.py b/paddlex/cv/models/deeplabv3p.py index a253aa5d1d8c005c7903b57a9b9b36da45982d78..e548439a7ed81fd5758395244d26926d3c8010fe 100644 --- a/paddlex/cv/models/deeplabv3p.py +++ b/paddlex/cv/models/deeplabv3p.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import os.path as osp @@ -190,11 +190,6 @@ class DeepLabv3p(BaseAPI): if mode == 'train': self.optimizer.minimize(model_out) outputs['loss'] = model_out - elif mode == 'eval': - outputs['loss'] = model_out[0] - outputs['pred'] = model_out[1] - outputs['label'] = model_out[2] - outputs['mask'] = model_out[3] else: outputs['pred'] = model_out[0] outputs['logit'] = model_out[1] @@ -247,14 +242,16 @@ class DeepLabv3p(BaseAPI): log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 save_dir (str): 模型保存路径。默认'output'。 pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET', - 则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET。 + 则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO', + 则自动下载在COCO数据集上预训练的模型权重;若为字符串'CITYSCAPES', + 则自动下载在CITYSCAPES数据集上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET。 optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用 fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 learning_rate (float): 默认优化器的初始学习率。默认0.01。 lr_decay_power (float): 默认优化器学习率衰减指数。默认0.9。 use_vdl (bool): 是否使用VisualDL进行可视化。默认False。 sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT', - 则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 + 则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 eval_metric_loss (float): 可容忍的精度损失。默认为0.05。 early_stop (bool): 是否使用提前终止训练策略。默认值为False。 early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内 @@ -336,18 +333,27 @@ class DeepLabv3p(BaseAPI): for step, data in tqdm.tqdm( enumerate(data_generator()), total=total_steps): images = np.array([d[0] for d in data]) - labels = np.array([d[1] for d in data]) + + _, _, im_h, im_w = images.shape + labels = list() + for d in data: + padding_label = np.zeros( + (1, im_h, im_w)).astype('int64') + self.ignore_index + _, label_h, label_w = d[1].shape + padding_label[:, :label_h, :label_w] = d[1] + labels.append(padding_label) + labels = np.array(labels) + num_samples = images.shape[0] if num_samples < batch_size: num_pad_samples = batch_size - num_samples pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) images = np.concatenate([images, pad_images]) feed_data = {'image': images} - outputs = self.exe.run( - self.parallel_test_prog, - feed=feed_data, - fetch_list=list(self.test_outputs.values()), - return_numpy=True) + outputs = self.exe.run(self.parallel_test_prog, + feed=feed_data, + fetch_list=list(self.test_outputs.values()), + return_numpy=True) pred = outputs[0] if num_samples < batch_size: pred = pred[0:num_samples] @@ -364,8 +370,7 @@ class DeepLabv3p(BaseAPI): metrics = OrderedDict( zip(['miou', 'category_iou', 'macc', 'category_acc', 'kappa'], - [miou, category_iou, macc, category_acc, - conf_mat.kappa()])) + [miou, category_iou, macc, category_acc, conf_mat.kappa()])) if return_details: eval_details = { 'confusion_matrix': conf_mat.confusion_matrix.tolist() @@ -394,10 +399,10 @@ class DeepLabv3p(BaseAPI): transforms=self.test_transforms, mode='test') im, im_info = self.test_transforms(im_file) im = np.expand_dims(im, axis=0) - result = self.exe.run( - self.test_prog, - feed={'image': im}, - fetch_list=list(self.test_outputs.values())) + result = self.exe.run(self.test_prog, + feed={'image': im}, + fetch_list=list(self.test_outputs.values()), + use_program_cache=True) pred = result[0] pred = np.squeeze(pred).astype('uint8') logit = result[1] @@ -413,6 +418,6 @@ class DeepLabv3p(BaseAPI): pred = pred[0:h, 0:w] logit = logit[0:h, 0:w, :] else: - raise Exception("Unexpected info '{}' in im_info".format( - info[0])) + raise Exception("Unexpected info '{}' in im_info".format(info[ + 0])) return {'label_map': pred, 'score_map': logit} diff --git a/paddlex/cv/models/fast_scnn.py b/paddlex/cv/models/fast_scnn.py new file mode 100644 index 0000000000000000000000000000000000000000..5f66e4df6ede1b48c0363b5b8a496b23021454ef --- /dev/null +++ b/paddlex/cv/models/fast_scnn.py @@ -0,0 +1,169 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import paddle.fluid as fluid +import paddlex +from collections import OrderedDict +from .deeplabv3p import DeepLabv3p + + +class FastSCNN(DeepLabv3p): + """实现Fast SCNN网络的构建并进行训练、评估、预测和模型导出。 + + Args: + num_classes (int): 类别数。 + use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。 + use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。 + 当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。 + class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为 + num_classes。当class_weight为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重 + 自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1, + 即平时使用的交叉熵损失函数。 + ignore_index (int): label上忽略的值,label为ignore_index的像素不参与损失函数的计算。默认255。 + multi_loss_weight (list): 多分支上的loss权重。默认计算一个分支上的loss,即默认值为[1.0]。 + 也支持计算两个分支或三个分支上的loss,权重按[fusion_branch_weight, higher_branch_weight, lower_branch_weight]排列, + fusion_branch_weight为空间细节分支和全局上下文分支融合后的分支上的loss权重,higher_branch_weight为空间细节分支上的loss权重, + lower_branch_weight为全局上下文分支上的loss权重,若higher_branch_weight和lower_branch_weight未设置则不会计算这两个分支上的loss。 + + Raises: + ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。 + ValueError: class_weight为list, 但长度不等于num_class。 + class_weight为str, 但class_weight.low()不等于dynamic。 + TypeError: class_weight不为None时,其类型不是list或str。 + TypeError: multi_loss_weight不为list。 + ValueError: multi_loss_weight为list但长度小于0或者大于3。 + """ + + def __init__(self, + num_classes=2, + use_bce_loss=False, + use_dice_loss=False, + class_weight=None, + ignore_index=255, + multi_loss_weight=[1.0]): + self.init_params = locals() + super(DeepLabv3p, self).__init__('segmenter') + # dice_loss或bce_loss只适用两类分割中 + if num_classes > 2 and (use_bce_loss or use_dice_loss): + raise ValueError( + "dice loss and bce loss is only applicable to binary classfication" + ) + + if class_weight is not None: + if isinstance(class_weight, list): + if len(class_weight) != num_classes: + raise ValueError( + "Length of class_weight should be equal to number of classes" + ) + elif isinstance(class_weight, str): + if class_weight.lower() != 'dynamic': + raise ValueError( + "if class_weight is string, must be dynamic!") + else: + raise TypeError( + 'Expect class_weight is a list or string but receive {}'. + format(type(class_weight))) + + if not isinstance(multi_loss_weight, list): + raise TypeError( + 'Expect multi_loss_weight is a list but receive {}'.format( + type(multi_loss_weight))) + if len(multi_loss_weight) > 3 or len(multi_loss_weight) < 0: + raise ValueError( + "Length of multi_loss_weight should be lower than or equal to 3 but greater than 0." + ) + + self.num_classes = num_classes + self.use_bce_loss = use_bce_loss + self.use_dice_loss = use_dice_loss + self.class_weight = class_weight + self.multi_loss_weight = multi_loss_weight + self.ignore_index = ignore_index + self.labels = None + self.fixed_input_shape = None + + def build_net(self, mode='train'): + model = paddlex.cv.nets.segmentation.FastSCNN( + self.num_classes, + mode=mode, + use_bce_loss=self.use_bce_loss, + use_dice_loss=self.use_dice_loss, + class_weight=self.class_weight, + ignore_index=self.ignore_index, + multi_loss_weight=self.multi_loss_weight, + fixed_input_shape=self.fixed_input_shape) + inputs = model.generate_inputs() + model_out = model.build_net(inputs) + outputs = OrderedDict() + if mode == 'train': + self.optimizer.minimize(model_out) + outputs['loss'] = model_out + else: + outputs['pred'] = model_out[0] + outputs['logit'] = model_out[1] + return inputs, outputs + + def train(self, + num_epochs, + train_dataset, + train_batch_size=2, + eval_dataset=None, + save_interval_epochs=1, + log_interval_steps=2, + save_dir='output', + pretrain_weights='CITYSCAPES', + optimizer=None, + learning_rate=0.01, + lr_decay_power=0.9, + use_vdl=False, + sensitivities_file=None, + eval_metric_loss=0.05, + early_stop=False, + early_stop_patience=5, + resume_checkpoint=None): + """训练。 + + Args: + num_epochs (int): 训练迭代轮数。 + train_dataset (paddlex.datasets): 训练数据读取器。 + train_batch_size (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。 + eval_dataset (paddlex.datasets): 评估数据读取器。 + save_interval_epochs (int): 模型保存间隔(单位:迭代轮数)。默认为1。 + log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 + save_dir (str): 模型保存路径。默认'output'。 + pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'CITYSCAPES' + 则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'CITYSCAPES'。 + optimizer (paddle.fluid.optimizer): 优化器。当改参数为None时,使用默认的优化器:使用 + fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 + learning_rate (float): 默认优化器的初始学习率。默认0.01。 + lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。 + use_vdl (bool): 是否使用VisualDL进行可视化。默认False。 + sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT', + 则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 + eval_metric_loss (float): 可容忍的精度损失。默认为0.05。 + early_stop (bool): 是否使用提前终止训练策略。默认值为False。 + early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内 + 连续下降或持平,则终止训练。默认值为5。 + resume_checkpoint (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 + + Raises: + ValueError: 模型从inference model进行加载。 + """ + return super(FastSCNN, self).train( + num_epochs, train_dataset, train_batch_size, eval_dataset, + save_interval_epochs, log_interval_steps, save_dir, + pretrain_weights, optimizer, learning_rate, lr_decay_power, + use_vdl, sensitivities_file, eval_metric_loss, early_stop, + early_stop_patience, resume_checkpoint) diff --git a/paddlex/cv/models/faster_rcnn.py b/paddlex/cv/models/faster_rcnn.py index 3b7144f3e9eeeb656940cc480637c6e871fcc210..45279bfc6014329ced089d39072221ceaf8dd683 100644 --- a/paddlex/cv/models/faster_rcnn.py +++ b/paddlex/cv/models/faster_rcnn.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import math @@ -32,7 +32,7 @@ class FasterRCNN(BaseAPI): Args: num_classes (int): 包含了背景类的类别数。默认为81。 backbone (str): FasterRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', - 'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。 + 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。 with_fpn (bool): 是否使用FPN结构。默认为True。 aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。 anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。 @@ -47,7 +47,8 @@ class FasterRCNN(BaseAPI): self.init_params = locals() super(FasterRCNN, self).__init__('detector') backbones = [ - 'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd' + 'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', + 'HRNet_W18' ] assert backbone in backbones, "backbone should be one of {}".format( backbones) @@ -79,6 +80,12 @@ class FasterRCNN(BaseAPI): layers = 101 variant = 'd' norm_type = 'affine_channel' + elif backbone_name == 'HRNet_W18': + backbone = paddlex.cv.nets.hrnet.HRNet( + width=18, freeze_norm=True, norm_decay=0., freeze_at=0) + if self.with_fpn is False: + self.with_fpn = True + return backbone if self.with_fpn: backbone = paddlex.cv.nets.resnet.ResNet( norm_type='bn' if norm_type is None else norm_type, @@ -117,12 +124,12 @@ class FasterRCNN(BaseAPI): model_out = model.build_net(inputs) loss = model_out['loss'] self.optimizer.minimize(loss) - outputs = OrderedDict([('loss', model_out['loss']), - ('loss_cls', model_out['loss_cls']), - ('loss_bbox', model_out['loss_bbox']), - ('loss_rpn_cls', model_out['loss_rpn_cls']), - ('loss_rpn_bbox', - model_out['loss_rpn_bbox'])]) + outputs = OrderedDict( + [('loss', model_out['loss']), + ('loss_cls', model_out['loss_cls']), + ('loss_bbox', model_out['loss_bbox']), + ('loss_rpn_cls', model_out['loss_rpn_cls']), ( + 'loss_rpn_bbox', model_out['loss_rpn_bbox'])]) else: outputs = model.build_net(inputs) return inputs, outputs @@ -131,8 +138,16 @@ class FasterRCNN(BaseAPI): lr_decay_epochs, lr_decay_gamma, num_steps_each_epoch): if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch: - raise Exception("warmup_steps should less than {}".format( - lr_decay_epochs[0] * num_steps_each_epoch)) + logging.error( + "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset", + exit=False) + logging.error( + "See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice", + exit=False) + logging.error( + "warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function". + format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps + // num_steps_each_epoch)) boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] values = [(lr_decay_gamma**i) * learning_rate for i in range(len(lr_decay_epochs) + 1)] @@ -181,7 +196,8 @@ class FasterRCNN(BaseAPI): log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为20。 save_dir (str): 模型保存路径。默认值为'output'。 pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET', - 则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 + 则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO', + 则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器: fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 learning_rate (float): 默认优化器的初始学习率。默认为0.0025。 @@ -227,7 +243,9 @@ class FasterRCNN(BaseAPI): # 构建训练、验证、测试网络 self.build_program() fuse_bn = True - if self.with_fpn and self.backbone in ['ResNet18', 'ResNet50']: + if self.with_fpn and self.backbone in [ + 'ResNet18', 'ResNet50', 'HRNet_W18' + ]: fuse_bn = False self.net_initialize( startup_prog=fluid.default_startup_program(), @@ -273,8 +291,7 @@ class FasterRCNN(BaseAPI): eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、 预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。 """ - self.arrange_transforms( - transforms=eval_dataset.transforms, mode='eval') + self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval') if metric is None: if hasattr(self, 'metric') and self.metric is not None: metric = self.metric @@ -293,14 +310,12 @@ class FasterRCNN(BaseAPI): logging.warning( "Faster RCNN supports batch_size=1 only during evaluating, so batch_size is forced to be set to 1." ) - dataset = eval_dataset.generator( - batch_size=batch_size, drop_last=False) + dataset = eval_dataset.generator(batch_size=batch_size, drop_last=False) total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) results = list() - logging.info( - "Start to evaluating(total_samples={}, total_steps={})...".format( - eval_dataset.num_samples, total_steps)) + logging.info("Start to evaluating(total_samples={}, total_steps={})...". + format(eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm(enumerate(dataset()), total=total_steps): images = np.array([d[0] for d in data]).astype('float32') im_infos = np.array([d[1] for d in data]).astype('float32') @@ -310,11 +325,10 @@ class FasterRCNN(BaseAPI): 'im_info': im_infos, 'im_shape': im_shapes, } - outputs = self.exe.run( - self.test_prog, - feed=[feed_data], - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed=[feed_data], + fetch_list=list(self.test_outputs.values()), + return_numpy=False) res = { 'bbox': (np.array(outputs[0]), outputs[0].recursive_sequence_lengths()) @@ -339,13 +353,13 @@ class FasterRCNN(BaseAPI): res['is_difficult'] = (np.array(res_is_difficult), [res_is_difficult_lod]) results.append(res) - logging.debug("[EVAL] Epoch={}, Step={}/{}".format( - epoch_id, step + 1, total_steps)) + logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step + + 1, total_steps)) box_ap_stats, eval_details = eval_results( results, metric, eval_dataset.coco_gt, with_background=True) metrics = OrderedDict( - zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'], - box_ap_stats)) + zip(['bbox_mmap' + if metric == 'COCO' else 'bbox_map'], box_ap_stats)) if return_details: return metrics, eval_details return metrics @@ -359,7 +373,8 @@ class FasterRCNN(BaseAPI): Returns: list: 预测结果列表,每个预测结果由预测框类别标签、 - 预测框类别名称、预测框坐标、预测框得分组成。 + 预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、 + 预测框得分组成。 """ if transforms is None and not hasattr(self, 'test_transforms'): raise Exception("transforms need to be defined, now is None.") @@ -373,15 +388,15 @@ class FasterRCNN(BaseAPI): im = np.expand_dims(im, axis=0) im_resize_info = np.expand_dims(im_resize_info, axis=0) im_shape = np.expand_dims(im_shape, axis=0) - outputs = self.exe.run( - self.test_prog, - feed={ - 'image': im, - 'im_info': im_resize_info, - 'im_shape': im_shape - }, - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed={ + 'image': im, + 'im_info': im_resize_info, + 'im_shape': im_shape + }, + fetch_list=list(self.test_outputs.values()), + return_numpy=False, + use_program_cache=True) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(list(self.test_outputs.keys()), outputs) diff --git a/paddlex/cv/models/hrnet.py b/paddlex/cv/models/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..3a000feee5fe6a2b6a93662e1dc65754d6e1cd68 --- /dev/null +++ b/paddlex/cv/models/hrnet.py @@ -0,0 +1,173 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import paddle.fluid as fluid +import paddlex +from collections import OrderedDict +from .deeplabv3p import DeepLabv3p + + +class HRNet(DeepLabv3p): + """实现HRNet网络的构建并进行训练、评估、预测和模型导出。 + + Args: + num_classes (int): 类别数。 + width (int): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64]。 + use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。 + use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。 + 当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。 + class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为 + num_classes。当class_weight为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重 + 自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1, + 即平时使用的交叉熵损失函数。 + ignore_index (int): label上忽略的值,label为ignore_index的像素不参与损失函数的计算。默认255。 + + Raises: + ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。 + ValueError: class_weight为list, 但长度不等于num_class。 + class_weight为str, 但class_weight.low()不等于dynamic。 + TypeError: class_weight不为None时,其类型不是list或str。 + """ + + def __init__(self, + num_classes=2, + width=18, + use_bce_loss=False, + use_dice_loss=False, + class_weight=None, + ignore_index=255): + self.init_params = locals() + super(DeepLabv3p, self).__init__('segmenter') + # dice_loss或bce_loss只适用两类分割中 + if num_classes > 2 and (use_bce_loss or use_dice_loss): + raise ValueError( + "dice loss and bce loss is only applicable to binary classfication" + ) + + if class_weight is not None: + if isinstance(class_weight, list): + if len(class_weight) != num_classes: + raise ValueError( + "Length of class_weight should be equal to number of classes" + ) + elif isinstance(class_weight, str): + if class_weight.lower() != 'dynamic': + raise ValueError( + "if class_weight is string, must be dynamic!") + else: + raise TypeError( + 'Expect class_weight is a list or string but receive {}'. + format(type(class_weight))) + self.num_classes = num_classes + self.width = width + self.use_bce_loss = use_bce_loss + self.use_dice_loss = use_dice_loss + self.class_weight = class_weight + self.ignore_index = ignore_index + self.labels = None + self.fixed_input_shape = None + + def build_net(self, mode='train'): + model = paddlex.cv.nets.segmentation.HRNet( + self.num_classes, + width=self.width, + mode=mode, + use_bce_loss=self.use_bce_loss, + use_dice_loss=self.use_dice_loss, + class_weight=self.class_weight, + ignore_index=self.ignore_index, + fixed_input_shape=self.fixed_input_shape) + inputs = model.generate_inputs() + model_out = model.build_net(inputs) + outputs = OrderedDict() + if mode == 'train': + self.optimizer.minimize(model_out) + outputs['loss'] = model_out + else: + outputs['pred'] = model_out[0] + outputs['logit'] = model_out[1] + return inputs, outputs + + def default_optimizer(self, + learning_rate, + num_epochs, + num_steps_each_epoch, + lr_decay_power=0.9): + decay_step = num_epochs * num_steps_each_epoch + lr_decay = fluid.layers.polynomial_decay( + learning_rate, + decay_step, + end_learning_rate=0, + power=lr_decay_power) + optimizer = fluid.optimizer.Momentum( + lr_decay, + momentum=0.9, + regularization=fluid.regularizer.L2Decay( + regularization_coeff=5e-04)) + return optimizer + + def train(self, + num_epochs, + train_dataset, + train_batch_size=2, + eval_dataset=None, + save_interval_epochs=1, + log_interval_steps=2, + save_dir='output', + pretrain_weights='IMAGENET', + optimizer=None, + learning_rate=0.01, + lr_decay_power=0.9, + use_vdl=False, + sensitivities_file=None, + eval_metric_loss=0.05, + early_stop=False, + early_stop_patience=5, + resume_checkpoint=None): + """训练。 + + Args: + num_epochs (int): 训练迭代轮数。 + train_dataset (paddlex.datasets): 训练数据读取器。 + train_batch_size (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。 + eval_dataset (paddlex.datasets): 评估数据读取器。 + save_interval_epochs (int): 模型保存间隔(单位:迭代轮数)。默认为1。 + log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 + save_dir (str): 模型保存路径。默认'output'。 + pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET', + 则自动下载在IMAGENET图片数据上预训练的模型权重;若为字符串'CITYSCAPES' + 则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 + optimizer (paddle.fluid.optimizer): 优化器。当改参数为None时,使用默认的优化器:使用 + fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 + learning_rate (float): 默认优化器的初始学习率。默认0.01。 + lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。 + use_vdl (bool): 是否使用VisualDL进行可视化。默认False。 + sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT', + 则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 + eval_metric_loss (float): 可容忍的精度损失。默认为0.05。 + early_stop (bool): 是否使用提前终止训练策略。默认值为False。 + early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内 + 连续下降或持平,则终止训练。默认值为5。 + resume_checkpoint (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 + + Raises: + ValueError: 模型从inference model进行加载。 + """ + return super(HRNet, self).train( + num_epochs, train_dataset, train_batch_size, eval_dataset, + save_interval_epochs, log_interval_steps, save_dir, + pretrain_weights, optimizer, learning_rate, lr_decay_power, use_vdl, + sensitivities_file, eval_metric_loss, early_stop, + early_stop_patience, resume_checkpoint) diff --git a/paddlex/cv/models/load_model.py b/paddlex/cv/models/load_model.py index 738f4ff00452d278b3988d9303bb15b0d8885979..87b30ac47c206f0b3723ffcf353d95078feeb892 100644 --- a/paddlex/cv/models/load_model.py +++ b/paddlex/cv/models/load_model.py @@ -41,7 +41,16 @@ def load_model(model_dir, fixed_input_shape=None): if 'model_name' in info['_init_params']: del info['_init_params']['model_name'] model = getattr(paddlex.cv.models, info['Model'])(**info['_init_params']) + model.fixed_input_shape = fixed_input_shape + if '_Attributes' in info: + if 'fixed_input_shape' in info['_Attributes']: + fixed_input_shape = info['_Attributes']['fixed_input_shape'] + if fixed_input_shape is not None: + logging.info("Model already has fixed_input_shape with {}". + format(fixed_input_shape)) + model.fixed_input_shape = fixed_input_shape + if status == "Normal" or \ status == "Prune" or status == "fluid.save": startup_prog = fluid.Program() @@ -88,8 +97,8 @@ def load_model(model_dir, fixed_input_shape=None): model.model_type, info['Transforms'], info['BatchTransforms']) model.eval_transforms = copy.deepcopy(model.test_transforms) else: - model.test_transforms = build_transforms( - model.model_type, info['Transforms'], to_rgb) + model.test_transforms = build_transforms(model.model_type, + info['Transforms'], to_rgb) model.eval_transforms = copy.deepcopy(model.test_transforms) if '_Attributes' in info: @@ -107,20 +116,7 @@ def fix_input_shape(info, fixed_input_shape=None): resize = {'ResizeByShort': {}} padding = {'Padding': {}} if info['_Attributes']['model_type'] == 'classifier': - crop_size = 0 - for transform in info['Transforms']: - if 'CenterCrop' in transform: - crop_size = transform['CenterCrop']['crop_size'] - break - assert crop_size == fixed_input_shape[ - 0], "fixed_input_shape must == CenterCrop:crop_size:{}".format( - crop_size) - assert crop_size == fixed_input_shape[ - 1], "fixed_input_shape must == CenterCrop:crop_size:{}".format( - crop_size) - if crop_size == 0: - logging.warning( - "fixed_input_shape must == input shape when trainning") + pass else: resize['ResizeByShort']['short_size'] = min(fixed_input_shape) resize['ResizeByShort']['max_size'] = max(fixed_input_shape) diff --git a/paddlex/cv/models/mask_rcnn.py b/paddlex/cv/models/mask_rcnn.py index ba5da33d8b2a660cf23a1d57b9bf97a312e29002..26d5e5cb4edc58be0fffaf6d778058c5846c1929 100644 --- a/paddlex/cv/models/mask_rcnn.py +++ b/paddlex/cv/models/mask_rcnn.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import math @@ -32,7 +32,7 @@ class MaskRCNN(FasterRCNN): Args: num_classes (int): 包含了背景类的类别数。默认为81。 backbone (str): MaskRCNN的backbone网络,取值范围为['ResNet18', 'ResNet50', - 'ResNet50_vd', 'ResNet101', 'ResNet101_vd']。默认为'ResNet50'。 + 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', 'HRNet_W18']。默认为'ResNet50'。 with_fpn (bool): 是否使用FPN结构。默认为True。 aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。 anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。 @@ -46,7 +46,8 @@ class MaskRCNN(FasterRCNN): anchor_sizes=[32, 64, 128, 256, 512]): self.init_params = locals() backbones = [ - 'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd' + 'ResNet18', 'ResNet50', 'ResNet50_vd', 'ResNet101', 'ResNet101_vd', + 'HRNet_W18' ] assert backbone in backbones, "backbone should be one of {}".format( backbones) @@ -81,13 +82,13 @@ class MaskRCNN(FasterRCNN): model_out = model.build_net(inputs) loss = model_out['loss'] self.optimizer.minimize(loss) - outputs = OrderedDict([('loss', model_out['loss']), - ('loss_cls', model_out['loss_cls']), - ('loss_bbox', model_out['loss_bbox']), - ('loss_mask', model_out['loss_mask']), - ('loss_rpn_cls', model_out['loss_rpn_cls']), - ('loss_rpn_bbox', - model_out['loss_rpn_bbox'])]) + outputs = OrderedDict( + [('loss', model_out['loss']), + ('loss_cls', model_out['loss_cls']), + ('loss_bbox', model_out['loss_bbox']), + ('loss_mask', model_out['loss_mask']), + ('loss_rpn_cls', model_out['loss_rpn_cls']), ( + 'loss_rpn_bbox', model_out['loss_rpn_bbox'])]) else: outputs = model.build_net(inputs) return inputs, outputs @@ -96,8 +97,16 @@ class MaskRCNN(FasterRCNN): lr_decay_epochs, lr_decay_gamma, num_steps_each_epoch): if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch: - raise Exception("warmup_step should less than {}".format( - lr_decay_epochs[0] * num_steps_each_epoch)) + logging.error( + "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset", + exit=False) + logging.error( + "See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice", + exit=False) + logging.error( + "warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function". + format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps + // num_steps_each_epoch)) boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] values = [(lr_decay_gamma**i) * learning_rate for i in range(len(lr_decay_epochs) + 1)] @@ -146,7 +155,8 @@ class MaskRCNN(FasterRCNN): log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为20。 save_dir (str): 模型保存路径。默认值为'output'。 pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET', - 则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 + 则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO', + 则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器: fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 learning_rate (float): 默认优化器的学习率。默认为1.0/800。 @@ -194,7 +204,9 @@ class MaskRCNN(FasterRCNN): # 构建训练、验证、测试网络 self.build_program() fuse_bn = True - if self.with_fpn and self.backbone in ['ResNet18', 'ResNet50']: + if self.with_fpn and self.backbone in [ + 'ResNet18', 'ResNet50', 'HRNet_W18' + ]: fuse_bn = False self.net_initialize( startup_prog=fluid.default_startup_program(), @@ -241,8 +253,7 @@ class MaskRCNN(FasterRCNN): 预测框坐标、预测框得分;'mask',对应元素预测区域结果列表,每个预测结果由图像id、 预测区域类别id、预测区域坐标、预测区域得分;’gt‘:真实标注框和标注区域相关信息。 """ - self.arrange_transforms( - transforms=eval_dataset.transforms, mode='eval') + self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval') if metric is None: if hasattr(self, 'metric') and self.metric is not None: metric = self.metric @@ -263,9 +274,8 @@ class MaskRCNN(FasterRCNN): total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) results = list() - logging.info( - "Start to evaluating(total_samples={}, total_steps={})...".format( - eval_dataset.num_samples, total_steps)) + logging.info("Start to evaluating(total_samples={}, total_steps={})...". + format(eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm( enumerate(data_generator()), total=total_steps): images = np.array([d[0] for d in data]).astype('float32') @@ -276,11 +286,10 @@ class MaskRCNN(FasterRCNN): 'im_info': im_infos, 'im_shape': im_shapes, } - outputs = self.exe.run( - self.test_prog, - feed=[feed_data], - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed=[feed_data], + fetch_list=list(self.test_outputs.values()), + return_numpy=False) res = { 'bbox': (np.array(outputs[0]), outputs[0].recursive_sequence_lengths()), @@ -292,8 +301,8 @@ class MaskRCNN(FasterRCNN): res['im_shape'] = (im_shapes, []) res['im_id'] = (np.array(res_im_id), []) results.append(res) - logging.debug("[EVAL] Epoch={}, Step={}/{}".format( - epoch_id, step + 1, total_steps)) + logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step + + 1, total_steps)) ap_stats, eval_details = eval_results( results, @@ -302,17 +311,16 @@ class MaskRCNN(FasterRCNN): with_background=True, resolution=self.mask_head_resolution) if metric == 'VOC': - if isinstance(ap_stats[0], np.ndarray) and isinstance( - ap_stats[1], np.ndarray): + if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1], + np.ndarray): metrics = OrderedDict( zip(['bbox_map', 'segm_map'], [ap_stats[0][1], ap_stats[1][1]])) else: - metrics = OrderedDict( - zip(['bbox_map', 'segm_map'], [0.0, 0.0])) + metrics = OrderedDict(zip(['bbox_map', 'segm_map'], [0.0, 0.0])) elif metric == 'COCO': - if isinstance(ap_stats[0], np.ndarray) and isinstance( - ap_stats[1], np.ndarray): + if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1], + np.ndarray): metrics = OrderedDict( zip(['bbox_mmap', 'segm_mmap'], [ap_stats[0][0], ap_stats[1][0]])) @@ -331,8 +339,10 @@ class MaskRCNN(FasterRCNN): transforms (paddlex.det.transforms): 数据预处理操作。 Returns: - dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、预测框坐标、预测框内的二值图、 - 预测框得分组成。 + dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、 + 预测框坐标(坐标格式为[xmin, ymin, w, h])、 + 原图大小的预测二值图(1表示预测框类别,0表示背景类)、 + 预测框得分组成。 """ if transforms is None and not hasattr(self, 'test_transforms'): raise Exception("transforms need to be defined, now is None.") @@ -346,15 +356,15 @@ class MaskRCNN(FasterRCNN): im = np.expand_dims(im, axis=0) im_resize_info = np.expand_dims(im_resize_info, axis=0) im_shape = np.expand_dims(im_shape, axis=0) - outputs = self.exe.run( - self.test_prog, - feed={ - 'image': im, - 'im_info': im_resize_info, - 'im_shape': im_shape - }, - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed={ + 'image': im, + 'im_info': im_resize_info, + 'im_shape': im_shape + }, + fetch_list=list(self.test_outputs.values()), + return_numpy=False, + use_program_cache=True) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(list(self.test_outputs.keys()), outputs) @@ -368,8 +378,8 @@ class MaskRCNN(FasterRCNN): import pycocotools.mask as mask_util for index, xywh_res in enumerate(xywh_results): del xywh_res['image_id'] - xywh_res['mask'] = mask_util.decode( - segm_results[index]['segmentation']) + xywh_res['mask'] = mask_util.decode(segm_results[index][ + 'segmentation']) xywh_res['category'] = self.labels[xywh_res['category_id']] results.append(xywh_res) return results diff --git a/paddlex/cv/models/slim/prune.py b/paddlex/cv/models/slim/prune.py index 810679d3d7cf70a14922a594af3468294f12d29c..ad4dec23b8e3b29eda30fa873f4baa625a004884 100644 --- a/paddlex/cv/models/slim/prune.py +++ b/paddlex/cv/models/slim/prune.py @@ -66,16 +66,15 @@ def sensitivity(program, progress = "%.2f%%" % (progress * 100) logging.info( "Total evaluate iters={}, current={}, progress={}, eta={}". - format( - total_evaluate_iters, current_iter, progress, - seconds_to_hms( - int(cost * (total_evaluate_iters - current_iter)))), + format(total_evaluate_iters, current_iter, progress, + seconds_to_hms( + int(cost * (total_evaluate_iters - current_iter)))), use_color=True) current_iter += 1 pruner = Pruner() - logging.info("sensitive - param: {}; ratios: {}".format( - name, ratio)) + logging.info("sensitive - param: {}; ratios: {}".format(name, + ratio)) pruned_program, param_backup, _ = pruner.prune( program=graph.program, scope=scope, @@ -87,8 +86,8 @@ def sensitivity(program, param_backup=True) pruned_metric = eval_func(pruned_program) loss = (baseline - pruned_metric) / baseline - logging.info("pruned param: {}; {}; loss={}".format( - name, ratio, loss)) + logging.info("pruned param: {}; {}; loss={}".format(name, ratio, + loss)) sensitivities[name][ratio] = loss @@ -116,6 +115,21 @@ def channel_prune(program, prune_names, prune_ratios, place, only_graph=False): Returns: paddle.fluid.Program: 裁剪后的Program。 """ + prog_var_shape_dict = {} + for var in program.list_vars(): + try: + prog_var_shape_dict[var.name] = var.shape + except Exception: + pass + index = 0 + for param, ratio in zip(prune_names, prune_ratios): + origin_num = prog_var_shape_dict[param][0] + pruned_num = int(round(origin_num * ratio)) + while origin_num == pruned_num: + ratio -= 0.1 + pruned_num = int(round(origin_num * (ratio))) + prune_ratios[index] = ratio + index += 1 scope = fluid.global_scope() pruner = Pruner() program, _, _ = pruner.prune( @@ -221,6 +235,9 @@ def cal_params_sensitivities(model, save_file, eval_dataset, batch_size=8): 其中``weight_0``是卷积Kernel名;``sensitivities['weight_0']``是一个字典,key是裁剪率,value是敏感度。 """ + if os.path.exists(save_file): + os.remove(save_file) + prune_names = get_prune_params(model) def eval_for_prune(program): @@ -284,6 +301,19 @@ def cal_model_size(program, place, sensitivities_file, eval_metric_loss=0.05): """ prune_params_ratios = get_params_ratios(sensitivities_file, eval_metric_loss) + prog_var_shape_dict = {} + for var in program.list_vars(): + try: + prog_var_shape_dict[var.name] = var.shape + except Exception: + pass + for param, ratio in prune_params_ratios.items(): + origin_num = prog_var_shape_dict[param][0] + pruned_num = int(round(origin_num * ratio)) + while origin_num == pruned_num: + ratio -= 0.1 + pruned_num = int(round(origin_num * (ratio))) + prune_params_ratios[param] = ratio prune_program = channel_prune( program, list(prune_params_ratios.keys()), diff --git a/paddlex/cv/models/slim/prune_config.py b/paddlex/cv/models/slim/prune_config.py index 34675a53b8bda539f3aa5cfc0adf374f449df303..49430e9bfb1dcc47fb93aa9fc7d05ceb21e2b9e8 100644 --- a/paddlex/cv/models/slim/prune_config.py +++ b/paddlex/cv/models/slim/prune_config.py @@ -142,13 +142,16 @@ def get_prune_params(model): program = model.test_prog if model_type.startswith('ResNet') or \ model_type.startswith('DenseNet') or \ - model_type.startswith('DarkNet'): + model_type.startswith('DarkNet') or \ + model_type.startswith('AlexNet'): for block in program.blocks: for param in block.all_parameters(): pd_var = fluid.global_scope().find_var(param.name) pd_param = pd_var.get_tensor() if len(np.array(pd_param).shape) == 4: prune_names.append(param.name) + if model_type == 'AlexNet': + prune_names.remove('conv5_weights') elif model_type == "MobileNetV1": prune_names.append("conv1_weights") for param in program.global_block().all_parameters(): @@ -162,7 +165,7 @@ def get_prune_params(model): continue prune_names.append(param.name) elif model_type.startswith("MobileNetV3"): - if model_type == 'MobileNetV3_small': + if model_type.startswith('MobileNetV3_small'): expand_prune_id = [3, 4] else: expand_prune_id = [2, 3, 4, 8, 9, 11] diff --git a/paddlex/cv/models/unet.py b/paddlex/cv/models/unet.py index d7bf80ed27898f65c059ad3febde4885b4e58a9f..34c597b0e190122c3ba80c485378273abff20b65 100644 --- a/paddlex/cv/models/unet.py +++ b/paddlex/cv/models/unet.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import paddlex @@ -95,11 +95,6 @@ class UNet(DeepLabv3p): if mode == 'train': self.optimizer.minimize(model_out) outputs['loss'] = model_out - elif mode == 'eval': - outputs['loss'] = model_out[0] - outputs['pred'] = model_out[1] - outputs['label'] = model_out[2] - outputs['mask'] = model_out[3] else: outputs['pred'] = model_out[0] outputs['logit'] = model_out[1] @@ -141,7 +136,7 @@ class UNet(DeepLabv3p): lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。 use_vdl (bool): 是否使用VisualDL进行可视化。默认False。 sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT', - 则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 + 则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 eval_metric_loss (float): 可容忍的精度损失。默认为0.05。 early_stop (bool): 是否使用提前终止训练策略。默认值为False。 early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内 diff --git a/paddlex/cv/models/utils/detection_eval.py b/paddlex/cv/models/utils/detection_eval.py index b9dcdaa029265483c2b9fb919426686c36a411f5..d2c0ae8abf867baddfc767bd6e1a73cf5d36ea3d 100644 --- a/paddlex/cv/models/utils/detection_eval.py +++ b/paddlex/cv/models/utils/detection_eval.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import diff --git a/paddlex/cv/models/utils/pretrain_weights.py b/paddlex/cv/models/utils/pretrain_weights.py index 81790a20144d8c255601b8a778eebf02c409c55d..af8a6aa2af452914462bb305e6a03fadc7f2836c 100644 --- a/paddlex/cv/models/utils/pretrain_weights.py +++ b/paddlex/cv/models/utils/pretrain_weights.py @@ -1,4 +1,5 @@ import paddlex +import paddlex.utils.logging as logging import paddlehub as hub import os import os.path as osp @@ -56,19 +57,120 @@ image_pretrain = { 'https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar', 'ShuffleNetV2': 'https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar', + 'HRNet_W18': + 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar', + 'HRNet_W30': + 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W30_C_pretrained.tar', + 'HRNet_W32': + 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W32_C_pretrained.tar', + 'HRNet_W40': + 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W40_C_pretrained.tar', + 'HRNet_W48': + 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W48_C_pretrained.tar', + 'HRNet_W60': + 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W60_C_pretrained.tar', + 'HRNet_W64': + 'https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W64_C_pretrained.tar', + 'AlexNet': + 'http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar' } coco_pretrain = { - 'UNet': 'https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz' + 'YOLOv3_DarkNet53_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar', + 'YOLOv3_MobileNetV1_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar', + 'YOLOv3_MobileNetV3_large_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams', + 'YOLOv3_ResNet34_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar', + 'YOLOv3_ResNet50_vd_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn.tar', + 'FasterRCNN_ResNet50_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_2x.tar', + 'FasterRCNN_ResNet50_vd_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_2x.tar', + 'FasterRCNN_ResNet101_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_2x.tar', + 'FasterRCNN_ResNet101_vd_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_2x.tar', + 'FasterRCNN_HRNet_W18_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_hrnetv2p_w18_2x.tar', + 'MaskRCNN_ResNet50_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_fpn_2x.tar', + 'MaskRCNN_ResNet50_vd_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_vd_fpn_2x.tar', + 'MaskRCNN_ResNet101_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_fpn_1x.tar', + 'MaskRCNN_ResNet101_vd_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_vd_fpn_1x.tar', + 'UNet_COCO': 'https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz', + 'DeepLabv3p_MobileNetV2_x1.0_COCO': + 'https://bj.bcebos.com/v1/paddleseg/deeplab_mobilenet_x1_0_coco.tgz', + 'DeepLabv3p_Xception65_COCO': + 'https://paddleseg.bj.bcebos.com/models/xception65_coco.tgz' +} + +cityscapes_pretrain = { + 'DeepLabv3p_MobileNetV2_x1.0_CITYSCAPES': + 'https://paddleseg.bj.bcebos.com/models/mobilenet_cityscapes.tgz', + 'DeepLabv3p_Xception65_CITYSCAPES': + 'https://paddleseg.bj.bcebos.com/models/xception65_bn_cityscapes.tgz', + 'HRNet_W18_CITYSCAPES': + 'https://paddleseg.bj.bcebos.com/models/hrnet_w18_bn_cityscapes.tgz', + 'FastSCNN_CITYSCAPES': + 'https://paddleseg.bj.bcebos.com/models/fast_scnn_cityscape.tar' } -def get_pretrain_weights(flag, model_type, backbone, save_dir): +def get_pretrain_weights(flag, class_name, backbone, save_dir): if flag is None: return None elif osp.isdir(flag): return flag + elif osp.isfile(flag): + return flag + warning_info = "{} does not support to be finetuned with weights pretrained on the {} dataset, so pretrain_weights is forced to be set to {}" + if flag == 'COCO': + if class_name == "FasterRCNN" and backbone in ['ResNet18'] or \ + class_name == "MaskRCNN" and backbone in ['ResNet18', 'HRNet_W18'] or \ + class_name == 'DeepLabv3p' and backbone in ['Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0']: + model_name = '{}_{}'.format(class_name, backbone) + logging.warning(warning_info.format(model_name, flag, 'IMAGENET')) + flag = 'IMAGENET' + elif class_name == 'HRNet': + logging.warning(warning_info.format(class_name, flag, 'IMAGENET')) + flag = 'IMAGENET' + elif class_name == 'FastSCNN': + logging.warning( + warning_info.format(class_name, flag, 'CITYSCAPES')) + flag = 'CITYSCAPES' + elif flag == 'CITYSCAPES': + model_name = '{}_{}'.format(class_name, backbone) + if class_name == 'UNet': + logging.warning(warning_info.format(class_name, flag, 'COCO')) + flag = 'COCO' + if class_name == 'HRNet' and backbone.split('_')[ + -1] in ['W30', 'W32', 'W40', 'W48', 'W60', 'W64']: + logging.warning(warning_info.format(backbone, flag, 'IMAGENET')) + flag = 'IMAGENET' + if class_name == 'DeepLabv3p' and backbone in [ + 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', + 'MobileNetV2_x1.5', 'MobileNetV2_x2.0' + ]: + model_name = '{}_{}'.format(class_name, backbone) + logging.warning(warning_info.format(model_name, flag, 'IMAGENET')) + flag = 'IMAGENET' elif flag == 'IMAGENET': + if class_name == 'UNet': + logging.warning(warning_info.format(class_name, flag, 'COCO')) + flag = 'COCO' + elif class_name == 'FastSCNN': + logging.warning( + warning_info.format(class_name, flag, 'CITYSCAPES')) + flag = 'CITYSCAPES' + + if flag == 'IMAGENET': new_save_dir = save_dir if hasattr(paddlex, 'pretrain_dir'): new_save_dir = paddlex.pretrain_dir @@ -80,15 +182,17 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir): backbone = 'MobileNetV3_small_x1_0_ssld' elif backbone == 'MobileNetV3_large_ssld': backbone = 'MobileNetV3_large_x1_0_ssld' - if model_type == 'detector': + if class_name in ['YOLOv3', 'FasterRCNN', 'MaskRCNN']: if backbone == 'ResNet50': backbone = 'DetResNet50' assert backbone in image_pretrain, "There is not ImageNet pretrain weights for {}, you may try COCO.".format( backbone) - # url = image_pretrain[backbone] - # fname = osp.split(url)[-1].split('.')[0] - # paddlex.utils.download_and_decompress(url, path=new_save_dir) - # return osp.join(new_save_dir, fname) + + # if backbone == 'AlexNet': + # url = image_pretrain[backbone] + # fname = osp.split(url)[-1].split('.')[0] + # paddlex.utils.download_and_decompress(url, path=new_save_dir) + # return osp.join(new_save_dir, fname) try: hub.download(backbone, save_path=new_save_dir) except Exception as e: @@ -103,17 +207,20 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir): raise Exception( "Unexpected error, please make sure paddlehub >= 1.6.2") return osp.join(new_save_dir, backbone) - elif flag == 'COCO': + elif flag in ['COCO', 'CITYSCAPES']: new_save_dir = save_dir if hasattr(paddlex, 'pretrain_dir'): new_save_dir = paddlex.pretrain_dir - url = coco_pretrain[backbone] + if class_name in ['YOLOv3', 'FasterRCNN', 'MaskRCNN', 'DeepLabv3p']: + backbone = '{}_{}'.format(class_name, backbone) + backbone = "{}_{}".format(backbone, flag) + if flag == 'COCO': + url = coco_pretrain[backbone] + elif flag == 'CITYSCAPES': + url = cityscapes_pretrain[backbone] fname = osp.split(url)[-1].split('.')[0] # paddlex.utils.download_and_decompress(url, path=new_save_dir) # return osp.join(new_save_dir, fname) - - assert backbone in coco_pretrain, "There is not COCO pretrain weights for {}, you may try ImageNet.".format( - backbone) try: hub.download(backbone, save_path=new_save_dir) except Exception as e: @@ -130,5 +237,5 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir): return osp.join(new_save_dir, backbone) else: raise Exception( - "pretrain_weights need to be defined as directory path or `IMAGENET` or 'COCO' (download pretrain weights automatically)." + "pretrain_weights need to be defined as directory path or 'IMAGENET' or 'COCO' or 'Cityscapes' (download pretrain weights automatically)." ) diff --git a/paddlex/cv/models/utils/visualize.py b/paddlex/cv/models/utils/visualize.py index 6982bdf62993598ab8f0d42e09af2e303a7957bb..89875114f13e2b275019e3a65fc19576013dd68a 100644 --- a/paddlex/cv/models/utils/visualize.py +++ b/paddlex/cv/models/utils/visualize.py @@ -1,17 +1,18 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -* import os import cv2 import colorsys diff --git a/paddlex/cv/models/yolo_v3.py b/paddlex/cv/models/yolo_v3.py index e1cd2d684f44a6cb93a0ac3415ccc30c653aa60e..85ee89fc86851ff9be104d0ee258eefce9843a69 100644 --- a/paddlex/cv/models/yolo_v3.py +++ b/paddlex/cv/models/yolo_v3.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import math @@ -128,8 +128,16 @@ class YOLOv3(BaseAPI): lr_decay_epochs, lr_decay_gamma, num_steps_each_epoch): if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch: - raise Exception("warmup_steps should less than {}".format( - lr_decay_epochs[0] * num_steps_each_epoch)) + logging.error( + "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset", + exit=False) + logging.error( + "See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice", + exit=False) + logging.error( + "warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function". + format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps + // num_steps_each_epoch)) boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] values = [(lr_decay_gamma**i) * learning_rate for i in range(len(lr_decay_epochs) + 1)] @@ -180,7 +188,8 @@ class YOLOv3(BaseAPI): log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为10。 save_dir (str): 模型保存路径。默认值为'output'。 pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET', - 则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 + 则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO', + 则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器: fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 learning_rate (float): 默认优化器的学习率。默认为1.0/8000。 @@ -277,8 +286,7 @@ class YOLOv3(BaseAPI): eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、 预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。 """ - self.arrange_transforms( - transforms=eval_dataset.transforms, mode='eval') + self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval') if metric is None: if hasattr(self, 'metric') and self.metric is not None: metric = self.metric @@ -298,19 +306,17 @@ class YOLOv3(BaseAPI): data_generator = eval_dataset.generator( batch_size=batch_size, drop_last=False) - logging.info( - "Start to evaluating(total_samples={}, total_steps={})...".format( - eval_dataset.num_samples, total_steps)) + logging.info("Start to evaluating(total_samples={}, total_steps={})...". + format(eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm( enumerate(data_generator()), total=total_steps): images = np.array([d[0] for d in data]) im_sizes = np.array([d[1] for d in data]) feed_data = {'image': images, 'im_size': im_sizes} - outputs = self.exe.run( - self.test_prog, - feed=[feed_data], - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed=[feed_data], + fetch_list=list(self.test_outputs.values()), + return_numpy=False) res = { 'bbox': (np.array(outputs[0]), outputs[0].recursive_sequence_lengths()) @@ -326,13 +332,13 @@ class YOLOv3(BaseAPI): res['gt_label'] = (res_gt_label, []) res['is_difficult'] = (res_is_difficult, []) results.append(res) - logging.debug("[EVAL] Epoch={}, Step={}/{}".format( - epoch_id, step + 1, total_steps)) + logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step + + 1, total_steps)) box_ap_stats, eval_details = eval_results( results, metric, eval_dataset.coco_gt, with_background=False) evaluate_metrics = OrderedDict( - zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'], - box_ap_stats)) + zip(['bbox_mmap' + if metric == 'COCO' else 'bbox_map'], box_ap_stats)) if return_details: return evaluate_metrics, eval_details return evaluate_metrics @@ -346,7 +352,8 @@ class YOLOv3(BaseAPI): Returns: list: 预测结果列表,每个预测结果由预测框类别标签、 - 预测框类别名称、预测框坐标、预测框得分组成。 + 预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、 + 预测框得分组成。 """ if transforms is None and not hasattr(self, 'test_transforms'): raise Exception("transforms need to be defined, now is None.") @@ -359,14 +366,12 @@ class YOLOv3(BaseAPI): im, im_size = self.test_transforms(img_file) im = np.expand_dims(im, axis=0) im_size = np.expand_dims(im_size, axis=0) - outputs = self.exe.run( - self.test_prog, - feed={ - 'image': im, - 'im_size': im_size - }, - fetch_list=list(self.test_outputs.values()), - return_numpy=False) + outputs = self.exe.run(self.test_prog, + feed={'image': im, + 'im_size': im_size}, + fetch_list=list(self.test_outputs.values()), + return_numpy=False, + use_program_cache=True) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(list(self.test_outputs.keys()), outputs) diff --git a/paddlex/cv/nets/__init__.py b/paddlex/cv/nets/__init__.py index 5948c529493e55aadf7721ab9eb046adf89f3a72..5b427fe31be957f92611f7cfc6a9e6102a3c9616 100644 --- a/paddlex/cv/nets/__init__.py +++ b/paddlex/cv/nets/__init__.py @@ -20,9 +20,12 @@ from .mobilenet_v2 import MobileNetV2 from .mobilenet_v3 import MobileNetV3 from .segmentation import UNet from .segmentation import DeepLabv3p +from .segmentation import FastSCNN from .xception import Xception from .densenet import DenseNet from .shufflenet_v2 import ShuffleNetV2 +from .hrnet import HRNet +from .alexnet import AlexNet def resnet18(input, num_classes=1000): @@ -51,14 +54,20 @@ def resnet50_vd(input, num_classes=1000): def resnet50_vd_ssld(input, num_classes=1000): - model = ResNet(layers=50, num_classes=num_classes, - variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3]) + model = ResNet( + layers=50, + num_classes=num_classes, + variant='d', + lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3]) return model(input) def resnet101_vd_ssld(input, num_classes=1000): - model = ResNet(layers=101, num_classes=num_classes, - variant='d', lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3]) + model = ResNet( + layers=101, + num_classes=num_classes, + variant='d', + lr_mult_list=[1.0, 0.1, 0.2, 0.2, 0.3]) return model(input) @@ -93,14 +102,18 @@ def mobilenetv3_large(input, num_classes=1000): def mobilenetv3_small_ssld(input, num_classes=1000): - model = MobileNetV3(num_classes=num_classes, model_name='small', - lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75]) + model = MobileNetV3( + num_classes=num_classes, + model_name='small', + lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75]) return model(input) def mobilenetv3_large_ssld(input, num_classes=1000): - model = MobileNetV3(num_classes=num_classes, model_name='large', - lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75]) + model = MobileNetV3( + num_classes=num_classes, + model_name='large', + lr_mult_list=[0.25, 0.25, 0.5, 0.5, 0.75]) return model(input) @@ -133,6 +146,17 @@ def densenet201(input, num_classes=1000): model = DenseNet(layers=201, num_classes=num_classes) return model(input) + def shufflenetv2(input, num_classes=1000): model = ShuffleNetV2(num_classes=num_classes) return model(input) + + +def hrnet_w18(input, num_classes=1000): + model = HRNet(width=18, num_classes=num_classes) + return model(input) + + +def alexnet(input, num_classes=1000): + model = AlexNet(num_classes=num_classes) + return model(input) diff --git a/paddlex/cv/nets/alexnet.py b/paddlex/cv/nets/alexnet.py new file mode 100644 index 0000000000000000000000000000000000000000..6770f437d982428cd8d5ed7edb44e00915754139 --- /dev/null +++ b/paddlex/cv/nets/alexnet.py @@ -0,0 +1,170 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid + + +class AlexNet(): + def __init__(self, num_classes=1000): + assert num_classes is not None, "In AlextNet, num_classes cannot be None" + self.num_classes = num_classes + + def __call__(self, input): + stdv = 1.0 / math.sqrt(input.shape[1] * 11 * 11) + layer_name = [ + "conv1", "conv2", "conv3", "conv4", "conv5", "fc6", "fc7", "fc8" + ] + conv1 = fluid.layers.conv2d( + input=input, + num_filters=64, + filter_size=11, + stride=4, + padding=2, + groups=1, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[0] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[0] + "_weights")) + pool1 = fluid.layers.pool2d( + input=conv1, + pool_size=3, + pool_stride=2, + pool_padding=0, + pool_type='max') + + stdv = 1.0 / math.sqrt(pool1.shape[1] * 5 * 5) + conv2 = fluid.layers.conv2d( + input=pool1, + num_filters=192, + filter_size=5, + stride=1, + padding=2, + groups=1, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[1] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[1] + "_weights")) + pool2 = fluid.layers.pool2d( + input=conv2, + pool_size=3, + pool_stride=2, + pool_padding=0, + pool_type='max') + + stdv = 1.0 / math.sqrt(pool2.shape[1] * 3 * 3) + conv3 = fluid.layers.conv2d( + input=pool2, + num_filters=384, + filter_size=3, + stride=1, + padding=1, + groups=1, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[2] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[2] + "_weights")) + + stdv = 1.0 / math.sqrt(conv3.shape[1] * 3 * 3) + conv4 = fluid.layers.conv2d( + input=conv3, + num_filters=256, + filter_size=3, + stride=1, + padding=1, + groups=1, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[3] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[3] + "_weights")) + + stdv = 1.0 / math.sqrt(conv4.shape[1] * 3 * 3) + conv5 = fluid.layers.conv2d( + input=conv4, + num_filters=256, + filter_size=3, + stride=1, + padding=1, + groups=1, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[4] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[4] + "_weights")) + pool5 = fluid.layers.pool2d( + input=conv5, + pool_size=3, + pool_stride=2, + pool_padding=0, + pool_type='max') + + drop6 = fluid.layers.dropout(x=pool5, dropout_prob=0.5) + stdv = 1.0 / math.sqrt(drop6.shape[1] * drop6.shape[2] * + drop6.shape[3] * 1.0) + + fc6 = fluid.layers.fc( + input=drop6, + size=4096, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[5] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[5] + "_weights")) + drop7 = fluid.layers.dropout(x=fc6, dropout_prob=0.5) + stdv = 1.0 / math.sqrt(drop7.shape[1] * 1.0) + + fc7 = fluid.layers.fc( + input=drop7, + size=4096, + act='relu', + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[6] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[6] + "_weights")) + + stdv = 1.0 / math.sqrt(fc7.shape[1] * 1.0) + out = fluid.layers.fc( + input=fc7, + size=self.num_classes, + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[7] + "_offset"), + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=layer_name[7] + "_weights")) + return out diff --git a/paddlex/cv/nets/darknet.py b/paddlex/cv/nets/darknet.py index 345f1c114228fbd402f554df6cdf5d77829a3eab..71c296c3127f855718df85a3a606e10c5cf49e25 100644 --- a/paddlex/cv/nets/darknet.py +++ b/paddlex/cv/nets/darknet.py @@ -68,13 +68,14 @@ class DarkNet(object): bias_attr=False) bn_name = name + ".bn" - + if self.num_classes: + regularizer = None + else: + regularizer = L2Decay(float(self.norm_decay)) bn_param_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.scale') + regularizer=regularizer, name=bn_name + '.scale') bn_bias_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.offset') + regularizer=regularizer, name=bn_name + '.offset') out = fluid.layers.batch_norm( input=conv, @@ -182,4 +183,4 @@ class DarkNet(object): bias_attr=ParamAttr(name='fc_offset')) return out - return blocks \ No newline at end of file + return blocks diff --git a/paddlex/cv/nets/densenet.py b/paddlex/cv/nets/densenet.py index 5ec49a2601e3b9104bf50cb201ad30995aa0927c..76997c48de412e52cf914c32057f8a1bd0c06f9d 100644 --- a/paddlex/cv/nets/densenet.py +++ b/paddlex/cv/nets/densenet.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -173,4 +173,4 @@ class DenseNet(object): bn_ac_conv = fluid.layers.dropout( x=bn_ac_conv, dropout_prob=dropout) bn_ac_conv = fluid.layers.concat([input, bn_ac_conv], axis=1) - return bn_ac_conv \ No newline at end of file + return bn_ac_conv diff --git a/paddlex/cv/nets/detection/faster_rcnn.py b/paddlex/cv/nets/detection/faster_rcnn.py index 7e7bd63dcb64065a3c7616b2e344d7152cbfdfde..3a7cf75a187e88383a3d72f0a56c5e8758449505 100644 --- a/paddlex/cv/nets/detection/faster_rcnn.py +++ b/paddlex/cv/nets/detection/faster_rcnn.py @@ -21,7 +21,7 @@ import copy from paddle import fluid -from .fpn import FPN +from .fpn import (FPN, HRFPN) from .rpn_head import (RPNHead, FPNRPNHead) from .roi_extractor import (RoIAlign, FPNRoIAlign) from .bbox_head import (BBoxHead, TwoFCHead) @@ -82,7 +82,12 @@ class FasterRCNN(object): self.backbone = backbone self.mode = mode if with_fpn and fpn is None: - fpn = FPN() + if self.backbone.__class__.__name__.startswith('HRNet'): + fpn = HRFPN() + fpn.min_level = 2 + fpn.max_level = 6 + else: + fpn = FPN() self.fpn = fpn self.num_classes = num_classes if rpn_head is None: diff --git a/paddlex/cv/nets/detection/fpn.py b/paddlex/cv/nets/detection/fpn.py index 8fd843b149d38fc2f640aa34df9e26432a25899e..710304812a520f6775c1d241e7958c64a4536768 100644 --- a/paddlex/cv/nets/detection/fpn.py +++ b/paddlex/cv/nets/detection/fpn.py @@ -23,7 +23,7 @@ from paddle.fluid.param_attr import ParamAttr from paddle.fluid.initializer import Xavier from paddle.fluid.regularizer import L2Decay -__all__ = ['FPN'] +__all__ = ['FPN', 'HRFPN'] def ConvNorm(input, @@ -219,8 +219,8 @@ class FPN(object): body_name = body_name_list[i] body_input = body_dict[body_name] top_output = self.fpn_inner_output[i - 1] - fpn_inner_single = self._add_topdown_lateral( - body_name, body_input, top_output) + fpn_inner_single = self._add_topdown_lateral(body_name, body_input, + top_output) self.fpn_inner_output[i] = fpn_inner_single fpn_dict = {} fpn_name_list = [] @@ -293,3 +293,107 @@ class FPN(object): spatial_scale.insert(0, spatial_scale[0] * 0.5) res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) return res_dict, spatial_scale + + +class HRFPN(object): + """ + HRNet, see https://arxiv.org/abs/1908.07919 + + Args: + num_chan (int): number of feature channels + pooling_type (str): pooling type of downsampling + share_conv (bool): whethet to share conv for different layers' reduction + spatial_scale (list): feature map scaling factor + """ + + def __init__( + self, + num_chan=256, + pooling_type="avg", + share_conv=False, + spatial_scale=[1. / 64, 1. / 32, 1. / 16, 1. / 8, 1. / 4], ): + self.num_chan = num_chan + self.pooling_type = pooling_type + self.share_conv = share_conv + self.spatial_scale = spatial_scale + + def get_output(self, body_dict): + num_out = len(self.spatial_scale) + body_name_list = list(body_dict.keys()) + + num_backbone_stages = len(body_name_list) + + outs = [] + outs.append(body_dict[body_name_list[0]]) + + # resize + for i in range(1, len(body_dict)): + resized = self.resize_input_tensor(body_dict[body_name_list[i]], + outs[0], 2**i) + outs.append(resized) + + # concat + out = fluid.layers.concat(outs, axis=1) + + # reduction + out = fluid.layers.conv2d( + input=out, + num_filters=self.num_chan, + filter_size=1, + stride=1, + padding=0, + param_attr=ParamAttr(name='hrfpn_reduction_weights'), + bias_attr=False) + + # conv + outs = [out] + for i in range(1, num_out): + outs.append( + self.pooling( + out, + size=2**i, + stride=2**i, + pooling_type=self.pooling_type)) + outputs = [] + + for i in range(num_out): + conv_name = "shared_fpn_conv" if self.share_conv else "shared_fpn_conv_" + str( + i) + conv = fluid.layers.conv2d( + input=outs[i], + num_filters=self.num_chan, + filter_size=3, + stride=1, + padding=1, + param_attr=ParamAttr(name=conv_name + "_weights"), + bias_attr=False) + outputs.append(conv) + + for idx in range(0, num_out - len(body_name_list)): + body_name_list.append("fpn_res5_sum_subsampled_{}x".format(2**( + idx + 1))) + + outputs = outputs[::-1] + body_name_list = body_name_list[::-1] + + res_dict = OrderedDict([(body_name_list[k], outputs[k]) + for k in range(len(body_name_list))]) + return res_dict, self.spatial_scale + + def resize_input_tensor(self, body_input, ref_output, scale): + shape = fluid.layers.shape(ref_output) + shape_hw = fluid.layers.slice(shape, axes=[0], starts=[2], ends=[4]) + out_shape_ = shape_hw + out_shape = fluid.layers.cast(out_shape_, dtype='int32') + out_shape.stop_gradient = True + body_output = fluid.layers.resize_bilinear( + body_input, scale=scale, out_shape=out_shape) + return body_output + + def pooling(self, input, size, stride, pooling_type): + pool = fluid.layers.pool2d( + input=input, + pool_size=size, + pool_stride=stride, + pool_type=pooling_type) + return pool diff --git a/paddlex/cv/nets/detection/mask_rcnn.py b/paddlex/cv/nets/detection/mask_rcnn.py index 010bacfe921137c69802ff25a405c65ea2141f7f..b67f44a61db87fd858c032617df2f191bbbda69f 100644 --- a/paddlex/cv/nets/detection/mask_rcnn.py +++ b/paddlex/cv/nets/detection/mask_rcnn.py @@ -21,7 +21,7 @@ import copy import paddle.fluid as fluid -from .fpn import FPN +from .fpn import (FPN, HRFPN) from .rpn_head import (RPNHead, FPNRPNHead) from .roi_extractor import (RoIAlign, FPNRoIAlign) from .bbox_head import (BBoxHead, TwoFCHead) @@ -92,11 +92,15 @@ class MaskRCNN(object): self.backbone = backbone self.mode = mode if with_fpn and fpn is None: - fpn = FPN( - num_chan=num_chan, - min_level=min_level, - max_level=max_level, - spatial_scale=spatial_scale) + if self.backbone.__class__.__name__.startswith('HRNet'): + fpn = HRFPN() + fpn.min_level = 2 + fpn.max_level = 6 + else: + fpn = FPN(num_chan=num_chan, + min_level=min_level, + max_level=max_level, + spatial_scale=spatial_scale) self.fpn = fpn self.num_classes = num_classes if rpn_head is None: diff --git a/paddlex/cv/nets/hrnet.py b/paddlex/cv/nets/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..a7934d385d4a53fd936410e37d3896fe21cb17ee --- /dev/null +++ b/paddlex/cv/nets/hrnet.py @@ -0,0 +1,474 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +from paddle import fluid +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.framework import Variable +from paddle.fluid.regularizer import L2Decay + +from numbers import Integral +from paddle.fluid.initializer import MSRA +import math + +__all__ = ['HRNet'] + + +class HRNet(object): + def __init__(self, + width=40, + has_se=False, + freeze_at=0, + norm_type='bn', + freeze_norm=False, + norm_decay=0., + feature_maps=[2, 3, 4, 5], + num_classes=None): + super(HRNet, self).__init__() + + if isinstance(feature_maps, Integral): + feature_maps = [feature_maps] + + assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" + assert len(feature_maps) > 0, "need one or more feature maps" + assert norm_type in ['bn', 'sync_bn'] + + self.width = width + self.has_se = has_se + self.channels = { + 18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]], + 30: [[30, 60], [30, 60, 120], [30, 60, 120, 240]], + 32: [[32, 64], [32, 64, 128], [32, 64, 128, 256]], + 40: [[40, 80], [40, 80, 160], [40, 80, 160, 320]], + 44: [[44, 88], [44, 88, 176], [44, 88, 176, 352]], + 48: [[48, 96], [48, 96, 192], [48, 96, 192, 384]], + 60: [[60, 120], [60, 120, 240], [60, 120, 240, 480]], + 64: [[64, 128], [64, 128, 256], [64, 128, 256, 512]], + } + + self.freeze_at = freeze_at + self.norm_type = norm_type + self.norm_decay = norm_decay + self.freeze_norm = freeze_norm + self.feature_maps = feature_maps + self.num_classes = num_classes + self.end_points = [] + return + + def net(self, input): + width = self.width + channels_2, channels_3, channels_4 = self.channels[width] + num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3 + + x = self.conv_bn_layer( + input=input, + filter_size=3, + num_filters=64, + stride=2, + if_act=True, + name='layer1_1') + x = self.conv_bn_layer( + input=x, + filter_size=3, + num_filters=64, + stride=2, + if_act=True, + name='layer1_2') + + la1 = self.layer1(x, name='layer2') + tr1 = self.transition_layer([la1], [256], channels_2, name='tr1') + st2 = self.stage(tr1, num_modules_2, channels_2, name='st2') + tr2 = self.transition_layer(st2, channels_2, channels_3, name='tr2') + st3 = self.stage(tr2, num_modules_3, channels_3, name='st3') + tr3 = self.transition_layer(st3, channels_3, channels_4, name='tr3') + st4 = self.stage(tr3, num_modules_4, channels_4, name='st4') + + # classification + if self.num_classes: + last_cls = self.last_cls_out(x=st4, name='cls_head') + y = last_cls[0] + last_num_filters = [256, 512, 1024] + for i in range(3): + y = fluid.layers.elementwise_add( + last_cls[i + 1], + self.conv_bn_layer( + input=y, + filter_size=3, + num_filters=last_num_filters[i], + stride=2, + name='cls_head_add' + str(i + 1))) + + y = self.conv_bn_layer( + input=y, + filter_size=1, + num_filters=2048, + stride=1, + name='cls_head_last_conv') + pool = fluid.layers.pool2d( + input=y, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc( + input=pool, + size=self.num_classes, + param_attr=ParamAttr( + name='fc_weights', + initializer=fluid.initializer.Uniform(-stdv, stdv)), + bias_attr=ParamAttr(name='fc_offset')) + return out + + # segmentation + if self.feature_maps == "stage4": + return st4 + + self.end_points = st4 + return st4[-1] + + def layer1(self, input, name=None): + conv = input + for i in range(4): + conv = self.bottleneck_block( + conv, + num_filters=64, + downsample=True if i == 0 else False, + name=name + '_' + str(i + 1)) + return conv + + def transition_layer(self, x, in_channels, out_channels, name=None): + num_in = len(in_channels) + num_out = len(out_channels) + out = [] + for i in range(num_out): + if i < num_in: + if in_channels[i] != out_channels[i]: + residual = self.conv_bn_layer( + x[i], + filter_size=3, + num_filters=out_channels[i], + name=name + '_layer_' + str(i + 1)) + out.append(residual) + else: + out.append(x[i]) + else: + residual = self.conv_bn_layer( + x[-1], + filter_size=3, + num_filters=out_channels[i], + stride=2, + name=name + '_layer_' + str(i + 1)) + out.append(residual) + return out + + def branches(self, x, block_num, channels, name=None): + out = [] + for i in range(len(channels)): + residual = x[i] + for j in range(block_num): + residual = self.basic_block( + residual, + channels[i], + name=name + '_branch_layer_' + str(i + 1) + '_' + + str(j + 1)) + out.append(residual) + return out + + def fuse_layers(self, x, channels, multi_scale_output=True, name=None): + out = [] + for i in range(len(channels) if multi_scale_output else 1): + residual = x[i] + if self.feature_maps == "stage4": + shape = fluid.layers.shape(residual) + width = shape[-1] + height = shape[-2] + for j in range(len(channels)): + if j > i: + y = self.conv_bn_layer( + x[j], + filter_size=1, + num_filters=channels[i], + if_act=False, + name=name + '_layer_' + str(i + 1) + '_' + str(j + 1)) + if self.feature_maps == "stage4": + y = fluid.layers.resize_bilinear( + input=y, out_shape=[height, width]) + else: + y = fluid.layers.resize_nearest( + input=y, scale=2**(j - i)) + residual = fluid.layers.elementwise_add( + x=residual, y=y, act=None) + elif j < i: + y = x[j] + for k in range(i - j): + if k == i - j - 1: + y = self.conv_bn_layer( + y, + filter_size=3, + num_filters=channels[i], + stride=2, + if_act=False, + name=name + '_layer_' + str(i + 1) + '_' + + str(j + 1) + '_' + str(k + 1)) + else: + y = self.conv_bn_layer( + y, + filter_size=3, + num_filters=channels[j], + stride=2, + name=name + '_layer_' + str(i + 1) + '_' + + str(j + 1) + '_' + str(k + 1)) + residual = fluid.layers.elementwise_add( + x=residual, y=y, act=None) + + residual = fluid.layers.relu(residual) + out.append(residual) + return out + + def high_resolution_module(self, + x, + channels, + multi_scale_output=True, + name=None): + residual = self.branches(x, 4, channels, name=name) + out = self.fuse_layers( + residual, + channels, + multi_scale_output=multi_scale_output, + name=name) + return out + + def stage(self, + x, + num_modules, + channels, + multi_scale_output=True, + name=None): + out = x + for i in range(num_modules): + if i == num_modules - 1 and multi_scale_output == False: + out = self.high_resolution_module( + out, + channels, + multi_scale_output=False, + name=name + '_' + str(i + 1)) + else: + out = self.high_resolution_module( + out, channels, name=name + '_' + str(i + 1)) + + return out + + def last_cls_out(self, x, name=None): + out = [] + num_filters_list = [32, 64, 128, 256] + for i in range(len(x)): + out.append( + self.bottleneck_block( + input=x[i], + num_filters=num_filters_list[i], + name=name + 'conv_' + str(i + 1), + downsample=True)) + return out + + def basic_block(self, + input, + num_filters, + stride=1, + downsample=False, + name=None): + residual = input + conv = self.conv_bn_layer( + input=input, + filter_size=3, + num_filters=num_filters, + stride=stride, + name=name + '_conv1') + conv = self.conv_bn_layer( + input=conv, + filter_size=3, + num_filters=num_filters, + if_act=False, + name=name + '_conv2') + if downsample: + residual = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_filters, + if_act=False, + name=name + '_downsample') + if self.has_se: + conv = self.squeeze_excitation( + input=conv, + num_channels=num_filters, + reduction_ratio=16, + name=name + '_fc') + return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') + + def bottleneck_block(self, + input, + num_filters, + stride=1, + downsample=False, + name=None): + residual = input + conv = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_filters, + name=name + '_conv1') + conv = self.conv_bn_layer( + input=conv, + filter_size=3, + num_filters=num_filters, + stride=stride, + name=name + '_conv2') + conv = self.conv_bn_layer( + input=conv, + filter_size=1, + num_filters=num_filters * 4, + if_act=False, + name=name + '_conv3') + if downsample: + residual = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_filters * 4, + if_act=False, + name=name + '_downsample') + if self.has_se: + conv = self.squeeze_excitation( + input=conv, + num_channels=num_filters * 4, + reduction_ratio=16, + name=name + '_fc') + return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') + + def squeeze_excitation(self, + input, + num_channels, + reduction_ratio, + name=None): + pool = fluid.layers.pool2d( + input=input, pool_size=0, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + squeeze = fluid.layers.fc( + input=pool, + size=num_channels / reduction_ratio, + act='relu', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_sqz_weights'), + bias_attr=ParamAttr(name=name + '_sqz_offset')) + stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) + excitation = fluid.layers.fc( + input=squeeze, + size=num_channels, + act='sigmoid', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_exc_weights'), + bias_attr=ParamAttr(name=name + '_exc_offset')) + scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) + return scale + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride=1, + padding=1, + num_groups=1, + if_act=True, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + act=None, + param_attr=ParamAttr( + initializer=MSRA(), name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = self._bn(input=conv, bn_name=bn_name) + if if_act: + bn = fluid.layers.relu(bn) + return bn + + def _bn(self, input, act=None, bn_name=None): + norm_lr = 0. if self.freeze_norm else 1. + norm_decay = self.norm_decay + if self.num_classes or self.feature_maps == "stage4": + regularizer = None + pattr_initializer = fluid.initializer.Constant(1.0) + battr_initializer = fluid.initializer.Constant(0.0) + else: + regularizer = L2Decay(norm_decay) + pattr_initializer = None + battr_initializer = None + pattr = ParamAttr( + name=bn_name + '_scale', + learning_rate=norm_lr, + regularizer=regularizer, + initializer=pattr_initializer) + battr = ParamAttr( + name=bn_name + '_offset', + learning_rate=norm_lr, + regularizer=regularizer, + initializer=battr_initializer) + + global_stats = True if self.freeze_norm else False + out = fluid.layers.batch_norm( + input=input, + act=act, + name=bn_name + '.output.1', + param_attr=pattr, + bias_attr=battr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', + use_global_stats=global_stats) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + if self.freeze_norm: + scale.stop_gradient = True + bias.stop_gradient = True + return out + + def __call__(self, input): + assert isinstance(input, Variable) + if isinstance(self.feature_maps, (list, tuple)): + assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ + "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) + + res_endpoints = [] + + res = input + feature_maps = self.feature_maps + out = self.net(input) + if self.num_classes or self.feature_maps == "stage4": + return out + + for i in feature_maps: + res = self.end_points[i - 2] + if i in self.feature_maps: + res_endpoints.append(res) + if self.freeze_at >= i: + res.stop_gradient = True + + return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) + for idx, feat in enumerate(res_endpoints)]) diff --git a/paddlex/cv/nets/mobilenet_v1.py b/paddlex/cv/nets/mobilenet_v1.py index 3ee1a8c1561cdc0308c84a1a79ab223763b17148..c9b99255fb36eb9a9b44ea12ba5ed3c099620db4 100755 --- a/paddlex/cv/nets/mobilenet_v1.py +++ b/paddlex/cv/nets/mobilenet_v1.py @@ -79,10 +79,14 @@ class MobileNetV1(object): bn_name = name + "_bn" norm_decay = self.norm_decay + if self.num_classes: + regularizer = None + else: + regularizer = L2Decay(norm_decay) bn_param_attr = ParamAttr( - regularizer=L2Decay(norm_decay), name=bn_name + '_scale') + regularizer=regularizer, name=bn_name + '_scale') bn_bias_attr = ParamAttr( - regularizer=L2Decay(norm_decay), name=bn_name + '_offset') + regularizer=regularizer, name=bn_name + '_offset') return fluid.layers.batch_norm( input=conv, act=act, @@ -189,12 +193,12 @@ class MobileNetV1(object): if self.num_classes: out = fluid.layers.pool2d( input=out, pool_type='avg', global_pooling=True) - output = fluid.layers.fc( - input=out, - size=self.num_classes, - param_attr=ParamAttr( - initializer=fluid.initializer.MSRA(), name="fc7_weights"), - bias_attr=ParamAttr(name="fc7_offset")) + output = fluid.layers.fc(input=out, + size=self.num_classes, + param_attr=ParamAttr( + initializer=fluid.initializer.MSRA(), + name="fc7_weights"), + bias_attr=ParamAttr(name="fc7_offset")) return output if not self.with_extra_blocks: @@ -213,4 +217,4 @@ class MobileNetV1(object): module17 = self._extra_block(module16, num_filters[3][0], num_filters[3][1], 1, 2, self.prefix_name + "conv7_4") - return module11, module13, module14, module15, module16, module17 \ No newline at end of file + return module11, module13, module14, module15, module16, module17 diff --git a/paddlex/cv/nets/mobilenet_v3.py b/paddlex/cv/nets/mobilenet_v3.py index 5f85ccda1f119a4b14bdf3e114bbe5b88a1b46fe..6adcee03d7bb9c5ffab0ceb7198083e3534e7ab9 100644 --- a/paddlex/cv/nets/mobilenet_v3.py +++ b/paddlex/cv/nets/mobilenet_v3.py @@ -31,6 +31,7 @@ class MobileNetV3(): with_extra_blocks (bool): if extra blocks should be added. extra_block_filters (list): number of filter for each extra block. """ + def __init__(self, scale=1.0, model_name='small', @@ -113,29 +114,36 @@ class MobileNetV3(): lr_idx = self.curr_stage // self.lr_interval lr_idx = min(lr_idx, len(self.lr_mult_list) - 1) lr_mult = self.lr_mult_list[lr_idx] - conv_param_attr = ParamAttr(name=name + '_weights', - learning_rate=lr_mult, - regularizer=L2Decay(self.conv_decay)) - conv = fluid.layers.conv2d(input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=conv_param_attr, - bias_attr=False) + if self.num_classes: + regularizer = None + else: + regularizer = L2Decay(self.conv_decay) + conv_param_attr = ParamAttr( + name=name + '_weights', + learning_rate=lr_mult, + regularizer=regularizer) + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=conv_param_attr, + bias_attr=False) bn_name = name + '_bn' - bn_param_attr = ParamAttr(name=bn_name + "_scale", - regularizer=L2Decay(self.norm_decay)) - bn_bias_attr = ParamAttr(name=bn_name + "_offset", - regularizer=L2Decay(self.norm_decay)) - bn = fluid.layers.batch_norm(input=conv, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') + bn_param_attr = ParamAttr( + name=bn_name + "_scale", regularizer=L2Decay(self.norm_decay)) + bn_bias_attr = ParamAttr( + name=bn_name + "_offset", regularizer=L2Decay(self.norm_decay)) + bn = fluid.layers.batch_norm( + input=conv, + param_attr=bn_param_attr, + bias_attr=bn_bias_attr, + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') if if_act: if act == 'relu': bn = fluid.layers.relu(bn) @@ -152,12 +160,10 @@ class MobileNetV3(): lr_idx = self.curr_stage // self.lr_interval lr_idx = min(lr_idx, len(self.lr_mult_list) - 1) lr_mult = self.lr_mult_list[lr_idx] - + num_mid_filter = int(num_out_filter // ratio) - pool = fluid.layers.pool2d(input=input, - pool_type='avg', - global_pooling=True, - use_cudnn=False) + pool = fluid.layers.pool2d( + input=input, pool_type='avg', global_pooling=True, use_cudnn=False) conv1 = fluid.layers.conv2d( input=pool, filter_size=1, @@ -191,43 +197,46 @@ class MobileNetV3(): use_se=False, name=None): input_data = input - conv0 = self._conv_bn_layer(input=input, - filter_size=1, - num_filters=num_mid_filter, - stride=1, - padding=0, - if_act=True, - act=act, - name=name + '_expand') + conv0 = self._conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_mid_filter, + stride=1, + padding=0, + if_act=True, + act=act, + name=name + '_expand') if self.block_stride == 16 and stride == 2: self.end_points.append(conv0) - conv1 = self._conv_bn_layer(input=conv0, - filter_size=filter_size, - num_filters=num_mid_filter, - stride=stride, - padding=int((filter_size - 1) // 2), - if_act=True, - act=act, - num_groups=num_mid_filter, - use_cudnn=False, - name=name + '_depthwise') + conv1 = self._conv_bn_layer( + input=conv0, + filter_size=filter_size, + num_filters=num_mid_filter, + stride=stride, + padding=int((filter_size - 1) // 2), + if_act=True, + act=act, + num_groups=num_mid_filter, + use_cudnn=False, + name=name + '_depthwise') if use_se: - conv1 = self._se_block(input=conv1, - num_out_filter=num_mid_filter, - name=name + '_se') + conv1 = self._se_block( + input=conv1, num_out_filter=num_mid_filter, name=name + '_se') - conv2 = self._conv_bn_layer(input=conv1, - filter_size=1, - num_filters=num_out_filter, - stride=1, - padding=0, - if_act=False, - name=name + '_linear') + conv2 = self._conv_bn_layer( + input=conv1, + filter_size=1, + num_filters=num_out_filter, + stride=1, + padding=0, + if_act=False, + name=name + '_linear') if num_in_filter != num_out_filter or stride != 1: return conv2 else: - return fluid.layers.elementwise_add(x=input_data, y=conv2, act=None) + return fluid.layers.elementwise_add( + x=input_data, y=conv2, act=None) def _extra_block_dw(self, input, @@ -235,29 +244,32 @@ class MobileNetV3(): num_filters2, stride, name=None): - pointwise_conv = self._conv_bn_layer(input=input, - filter_size=1, - num_filters=int(num_filters1), - stride=1, - padding="SAME", - act='relu6', - name=name + "_extra1") - depthwise_conv = self._conv_bn_layer(input=pointwise_conv, - filter_size=3, - num_filters=int(num_filters2), - stride=stride, - padding="SAME", - num_groups=int(num_filters1), - act='relu6', - use_cudnn=False, - name=name + "_extra2_dw") - normal_conv = self._conv_bn_layer(input=depthwise_conv, - filter_size=1, - num_filters=int(num_filters2), - stride=1, - padding="SAME", - act='relu6', - name=name + "_extra2_sep") + pointwise_conv = self._conv_bn_layer( + input=input, + filter_size=1, + num_filters=int(num_filters1), + stride=1, + padding="SAME", + act='relu6', + name=name + "_extra1") + depthwise_conv = self._conv_bn_layer( + input=pointwise_conv, + filter_size=3, + num_filters=int(num_filters2), + stride=stride, + padding="SAME", + num_groups=int(num_filters1), + act='relu6', + use_cudnn=False, + name=name + "_extra2_dw") + normal_conv = self._conv_bn_layer( + input=depthwise_conv, + filter_size=1, + num_filters=int(num_filters2), + stride=1, + padding="SAME", + act='relu6', + name=name + "_extra2_sep") return normal_conv def __call__(self, input): @@ -282,36 +294,39 @@ class MobileNetV3(): self.block_stride *= layer_cfg[5] if layer_cfg[5] == 2: blocks.append(conv) - conv = self._residual_unit(input=conv, - num_in_filter=inplanes, - num_mid_filter=int(scale * layer_cfg[1]), - num_out_filter=int(scale * layer_cfg[2]), - act=layer_cfg[4], - stride=layer_cfg[5], - filter_size=layer_cfg[0], - use_se=layer_cfg[3], - name='conv' + str(i + 2)) - + conv = self._residual_unit( + input=conv, + num_in_filter=inplanes, + num_mid_filter=int(scale * layer_cfg[1]), + num_out_filter=int(scale * layer_cfg[2]), + act=layer_cfg[4], + stride=layer_cfg[5], + filter_size=layer_cfg[0], + use_se=layer_cfg[3], + name='conv' + str(i + 2)) + inplanes = int(scale * layer_cfg[2]) i += 1 self.curr_stage = i blocks.append(conv) if self.num_classes: - conv = self._conv_bn_layer(input=conv, - filter_size=1, - num_filters=int(scale * self.cls_ch_squeeze), - stride=1, - padding=0, - num_groups=1, - if_act=True, - act='hard_swish', - name='conv_last') - - conv = fluid.layers.pool2d(input=conv, - pool_type='avg', - global_pooling=True, - use_cudnn=False) + conv = self._conv_bn_layer( + input=conv, + filter_size=1, + num_filters=int(scale * self.cls_ch_squeeze), + stride=1, + padding=0, + num_groups=1, + if_act=True, + act='hard_swish', + name='conv_last') + + conv = fluid.layers.pool2d( + input=conv, + pool_type='avg', + global_pooling=True, + use_cudnn=False) conv = fluid.layers.conv2d( input=conv, num_filters=self.cls_ch_expand, @@ -326,22 +341,23 @@ class MobileNetV3(): out = fluid.layers.fc(input=drop, size=self.num_classes, param_attr=ParamAttr(name='fc_weights'), - bias_attr=ParamAttr(name='fc_offset')) + bias_attr=ParamAttr(name='fc_offset')) return out if not self.with_extra_blocks: return blocks # extra block - conv_extra = self._conv_bn_layer(conv, - filter_size=1, - num_filters=int(scale * cfg[-1][1]), - stride=1, - padding="SAME", - num_groups=1, - if_act=True, - act='hard_swish', - name='conv' + str(i + 2)) + conv_extra = self._conv_bn_layer( + conv, + filter_size=1, + num_filters=int(scale * cfg[-1][1]), + stride=1, + padding="SAME", + num_groups=1, + if_act=True, + act='hard_swish', + name='conv' + str(i + 2)) self.end_points.append(conv_extra) i += 1 for block_filter in self.extra_block_filters: diff --git a/paddlex/cv/nets/resnet.py b/paddlex/cv/nets/resnet.py index 40c6965fccbc823f06abc8270c30d7c3e7e21c74..ff7a8d17ac9862f319d81ddcc5cb938918677692 100644 --- a/paddlex/cv/nets/resnet.py +++ b/paddlex/cv/nets/resnet.py @@ -135,8 +135,10 @@ class ResNet(object): filter_size=filter_size, stride=stride, padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), + param_attr=ParamAttr( + initializer=Constant(0.0), name=name + ".w_0"), + bias_attr=ParamAttr( + initializer=Constant(0.0), name=name + ".b_0"), act=act, name=name) return out @@ -151,7 +153,8 @@ class ResNet(object): name=None, dcn_v2=False, use_lr_mult_list=False): - lr_mult = self.lr_mult_list[self.curr_stage] if use_lr_mult_list else 1.0 + lr_mult = self.lr_mult_list[ + self.curr_stage] if use_lr_mult_list else 1.0 _name = self.prefix_name + name if self.prefix_name != '' else name if not dcn_v2: conv = fluid.layers.conv2d( @@ -162,8 +165,8 @@ class ResNet(object): padding=(filter_size - 1) // 2, groups=groups, act=None, - param_attr=ParamAttr(name=_name + "_weights", - learning_rate=lr_mult), + param_attr=ParamAttr( + name=_name + "_weights", learning_rate=lr_mult), bias_attr=False, name=_name + '.conv2d.output.1') else: @@ -202,14 +205,18 @@ class ResNet(object): norm_lr = 0. if self.freeze_norm else lr_mult norm_decay = self.norm_decay + if self.num_classes: + regularizer = None + else: + regularizer = L2Decay(norm_decay) pattr = ParamAttr( name=bn_name + '_scale', learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) + regularizer=regularizer) battr = ParamAttr( name=bn_name + '_offset', learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) + regularizer=regularizer) if self.norm_type in ['bn', 'sync_bn']: global_stats = True if self.freeze_norm else False @@ -262,8 +269,8 @@ class ResNet(object): pool_padding=0, ceil_mode=True, pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name, - use_lr_mult_list=True) + return self._conv_norm( + input, ch_out, 1, 1, name=name, use_lr_mult_list=True) return self._conv_norm(input, ch_out, 1, stride, name=name) else: return input diff --git a/paddlex/cv/nets/segmentation/__init__.py b/paddlex/cv/nets/segmentation/__init__.py index f5af8c95426abb9c7b181ff8c717fe99edbf9760..8c7d9674ae79a3ee6145c1c92612498ac7340faa 100644 --- a/paddlex/cv/nets/segmentation/__init__.py +++ b/paddlex/cv/nets/segmentation/__init__.py @@ -14,5 +14,7 @@ from .unet import UNet from .deeplabv3p import DeepLabv3p +from .hrnet import HRNet +from .fast_scnn import FastSCNN from .model_utils import libs from .model_utils import loss diff --git a/paddlex/cv/nets/segmentation/deeplabv3p.py b/paddlex/cv/nets/segmentation/deeplabv3p.py index 08dad240c0b28d6e6e13845dcc0c9148c442014f..4f9e538a28abe37481b96a517019b252f6d96f45 100644 --- a/paddlex/cv/nets/segmentation/deeplabv3p.py +++ b/paddlex/cv/nets/segmentation/deeplabv3p.py @@ -28,7 +28,6 @@ from .model_utils.libs import sigmoid_to_softmax from .model_utils.loss import softmax_with_loss from .model_utils.loss import dice_loss from .model_utils.loss import bce_loss -import paddlex.utils.logging as logging from paddlex.cv.nets.xception import Xception from paddlex.cv.nets.mobilenet_v2 import MobileNetV2 @@ -135,7 +134,8 @@ class DeepLabv3p(object): param_attr = fluid.ParamAttr( name=name_scope + 'weights', regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.06)) with scope('encoder'): channel = 256 with scope("image_pool"): @@ -151,8 +151,8 @@ class DeepLabv3p(object): padding=0, param_attr=param_attr)) input_shape = fluid.layers.shape(input) - image_avg = fluid.layers.resize_bilinear( - image_avg, input_shape[2:]) + image_avg = fluid.layers.resize_bilinear(image_avg, + input_shape[2:]) with scope("aspp0"): aspp0 = bn_relu( @@ -244,7 +244,8 @@ class DeepLabv3p(object): param_attr = fluid.ParamAttr( name=name_scope + 'weights', regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.06)) with scope('decoder'): with scope('concat'): decode_shortcut = bn_relu( @@ -326,9 +327,6 @@ class DeepLabv3p(object): if self.mode == 'train': inputs['label'] = fluid.data( dtype='int32', shape=[None, 1, None, None], name='label') - elif self.mode == 'eval': - inputs['label'] = fluid.data( - dtype='int32', shape=[None, 1, None, None], name='label') return inputs def build_net(self, inputs): @@ -351,7 +349,8 @@ class DeepLabv3p(object): name=name_scope + 'weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.01)) with scope('logit'): with fluid.name_scope('last_conv'): logit = conv( diff --git a/paddlex/cv/nets/segmentation/fast_scnn.py b/paddlex/cv/nets/segmentation/fast_scnn.py new file mode 100644 index 0000000000000000000000000000000000000000..71866e56df9adf31c45d841a7bcde3a062c3067a --- /dev/null +++ b/paddlex/cv/nets/segmentation/fast_scnn.py @@ -0,0 +1,395 @@ +# coding: utf8 +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +import paddle.fluid as fluid +from .model_utils.libs import scope +from .model_utils.libs import bn, bn_relu, relu, conv_bn_layer +from .model_utils.libs import conv, avg_pool +from .model_utils.libs import separate_conv +from .model_utils.libs import sigmoid_to_softmax +from .model_utils.loss import softmax_with_loss +from .model_utils.loss import dice_loss +from .model_utils.loss import bce_loss + + +class FastSCNN(object): + def __init__(self, + num_classes, + mode='train', + use_bce_loss=False, + use_dice_loss=False, + class_weight=None, + multi_loss_weight=[1.0], + ignore_index=255, + fixed_input_shape=None): + # dice_loss或bce_loss只适用两类分割中 + if num_classes > 2 and (use_bce_loss or use_dice_loss): + raise ValueError( + "dice loss and bce loss is only applicable to binary classfication" + ) + + if class_weight is not None: + if isinstance(class_weight, list): + if len(class_weight) != num_classes: + raise ValueError( + "Length of class_weight should be equal to number of classes" + ) + elif isinstance(class_weight, str): + if class_weight.lower() != 'dynamic': + raise ValueError( + "if class_weight is string, must be dynamic!") + else: + raise TypeError( + 'Expect class_weight is a list or string but receive {}'. + format(type(class_weight))) + + self.num_classes = num_classes + self.mode = mode + self.use_bce_loss = use_bce_loss + self.use_dice_loss = use_dice_loss + self.class_weight = class_weight + self.ignore_index = ignore_index + self.multi_loss_weight = multi_loss_weight + self.fixed_input_shape = fixed_input_shape + + def build_net(self, inputs): + if self.use_dice_loss or self.use_bce_loss: + self.num_classes = 1 + image = inputs['image'] + size = fluid.layers.shape(image)[2:] + with scope('learning_to_downsample'): + higher_res_features = self._learning_to_downsample(image, 32, 48, + 64) + with scope('global_feature_extractor'): + lower_res_feature = self._global_feature_extractor( + higher_res_features, 64, [64, 96, 128], 128, 6, [3, 3, 3]) + with scope('feature_fusion'): + x = self._feature_fusion(higher_res_features, lower_res_feature, + 64, 128, 128) + with scope('classifier'): + logit = self._classifier(x, 128) + logit = fluid.layers.resize_bilinear(logit, size, align_mode=0) + + if len(self.multi_loss_weight) == 3: + with scope('aux_layer_higher'): + higher_logit = self._aux_layer(higher_res_features, + self.num_classes) + higher_logit = fluid.layers.resize_bilinear( + higher_logit, size, align_mode=0) + with scope('aux_layer_lower'): + lower_logit = self._aux_layer(lower_res_feature, + self.num_classes) + lower_logit = fluid.layers.resize_bilinear( + lower_logit, size, align_mode=0) + logit = (logit, higher_logit, lower_logit) + elif len(self.multi_loss_weight) == 2: + with scope('aux_layer_higher'): + higher_logit = self._aux_layer(higher_res_features, + self.num_classes) + higher_logit = fluid.layers.resize_bilinear( + higher_logit, size, align_mode=0) + logit = (logit, higher_logit) + else: + logit = (logit, ) + + if self.num_classes == 1: + out = sigmoid_to_softmax(logit[0]) + out = fluid.layers.transpose(out, [0, 2, 3, 1]) + else: + out = fluid.layers.transpose(logit[0], [0, 2, 3, 1]) + + pred = fluid.layers.argmax(out, axis=3) + pred = fluid.layers.unsqueeze(pred, axes=[3]) + + if self.mode == 'train': + label = inputs['label'] + return self._get_loss(logit, label) + elif self.mode == 'eval': + label = inputs['label'] + loss = self._get_loss(logit, label) + return loss, pred, label, mask + else: + if self.num_classes == 1: + logit = sigmoid_to_softmax(logit[0]) + else: + logit = fluid.layers.softmax(logit[0], axis=1) + return pred, logit + + def generate_inputs(self): + inputs = OrderedDict() + if self.fixed_input_shape is not None: + input_shape = [ + None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0] + ] + inputs['image'] = fluid.data( + dtype='float32', shape=input_shape, name='image') + else: + inputs['image'] = fluid.data( + dtype='float32', shape=[None, 3, None, None], name='image') + if self.mode == 'train': + inputs['label'] = fluid.data( + dtype='int32', shape=[None, 1, None, None], name='label') + elif self.mode == 'eval': + inputs['label'] = fluid.data( + dtype='int32', shape=[None, 1, None, None], name='label') + return inputs + + def _get_loss(self, logits, label): + avg_loss = 0 + if not (self.use_dice_loss or self.use_bce_loss): + for i, logit in enumerate(logits): + logit_mask = ( + label.astype('int32') != self.ignore_index).astype('int32') + loss = softmax_with_loss( + logit, + label, + logit_mask, + num_classes=self.num_classes, + weight=self.class_weight, + ignore_index=self.ignore_index) + avg_loss += self.multi_loss_weight[i] * loss + else: + if self.use_dice_loss: + for i, logit in enumerate(logits): + logit_mask = (label.astype('int32') != self.ignore_index + ).astype('int32') + loss = dice_loss(logit, label, logit_mask) + avg_loss += self.multi_loss_weight[i] * loss + if self.use_bce_loss: + for i, logit in enumerate(logits): + #logit_label = fluid.layers.resize_nearest(label, logit_shape[2:]) + logit_mask = (label.astype('int32') != self.ignore_index + ).astype('int32') + loss = bce_loss( + logit, + label, + logit_mask, + ignore_index=self.ignore_index) + avg_loss += self.multi_loss_weight[i] * loss + return avg_loss + + def _learning_to_downsample(self, + x, + dw_channels1=32, + dw_channels2=48, + out_channels=64): + x = relu(bn(conv(x, dw_channels1, 3, 2))) + with scope('dsconv1'): + x = separate_conv( + x, dw_channels2, stride=2, filter=3, act=fluid.layers.relu) + with scope('dsconv2'): + x = separate_conv( + x, out_channels, stride=2, filter=3, act=fluid.layers.relu) + return x + + def _shortcut(self, input, data_residual): + return fluid.layers.elementwise_add(input, data_residual) + + def _dropout2d(self, input, prob, is_train=False): + if not is_train: + return input + keep_prob = 1.0 - prob + shape = fluid.layers.shape(input) + channels = shape[1] + random_tensor = keep_prob + fluid.layers.uniform_random( + [shape[0], channels, 1, 1], min=0., max=1.) + binary_tensor = fluid.layers.floor(random_tensor) + output = input / keep_prob * binary_tensor + return output + + def _inverted_residual_unit(self, + input, + num_in_filter, + num_filters, + ifshortcut, + stride, + filter_size, + padding, + expansion_factor, + name=None): + num_expfilter = int(round(num_in_filter * expansion_factor)) + + channel_expand = conv_bn_layer( + input=input, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name=name + '_expand') + + bottleneck_conv = conv_bn_layer( + input=channel_expand, + num_filters=num_expfilter, + filter_size=filter_size, + stride=stride, + padding=padding, + num_groups=num_expfilter, + if_act=True, + name=name + '_dwise', + use_cudnn=False) + + depthwise_output = bottleneck_conv + + linear_out = conv_bn_layer( + input=bottleneck_conv, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=False, + name=name + '_linear') + + if ifshortcut: + out = self._shortcut(input=input, data_residual=linear_out) + return out, depthwise_output + else: + return linear_out, depthwise_output + + def _inverted_blocks(self, input, in_c, t, c, n, s, name=None): + first_block, depthwise_output = self._inverted_residual_unit( + input=input, + num_in_filter=in_c, + num_filters=c, + ifshortcut=False, + stride=s, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_1') + + last_residual_block = first_block + last_c = c + + for i in range(1, n): + last_residual_block, depthwise_output = self._inverted_residual_unit( + input=last_residual_block, + num_in_filter=last_c, + num_filters=c, + ifshortcut=True, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_' + str(i + 1)) + return last_residual_block, depthwise_output + + def _psp_module(self, input, out_features): + + cat_layers = [] + sizes = (1, 2, 3, 6) + for size in sizes: + psp_name = "psp" + str(size) + with scope(psp_name): + pool = fluid.layers.adaptive_pool2d( + input, + pool_size=[size, size], + pool_type='avg', + name=psp_name + '_adapool') + data = conv( + pool, + out_features, + filter_size=1, + bias_attr=False, + name=psp_name + '_conv') + data_bn = bn(data, act='relu') + interp = fluid.layers.resize_bilinear( + data_bn, + out_shape=fluid.layers.shape(input)[2:], + name=psp_name + '_interp', + align_mode=0) + cat_layers.append(interp) + cat_layers = [input] + cat_layers + out = fluid.layers.concat(cat_layers, axis=1, name='psp_cat') + + return out + + def _aux_layer(self, x, num_classes): + x = relu(bn(conv(x, 32, 3, padding=1))) + x = self._dropout2d(x, 0.1, is_train=(self.mode == 'train')) + with scope('logit'): + x = conv(x, num_classes, 1, bias_attr=True) + return x + + def _feature_fusion(self, + higher_res_feature, + lower_res_feature, + higher_in_channels, + lower_in_channels, + out_channels, + scale_factor=4): + shape = fluid.layers.shape(higher_res_feature) + w = shape[-1] + h = shape[-2] + lower_res_feature = fluid.layers.resize_bilinear( + lower_res_feature, [h, w], align_mode=0) + + with scope('dwconv'): + lower_res_feature = relu( + bn(conv(lower_res_feature, out_channels, + 1))) #(lower_res_feature) + with scope('conv_lower_res'): + lower_res_feature = bn( + conv( + lower_res_feature, out_channels, 1, bias_attr=True)) + with scope('conv_higher_res'): + higher_res_feature = bn( + conv( + higher_res_feature, out_channels, 1, bias_attr=True)) + out = higher_res_feature + lower_res_feature + + return relu(out) + + def _global_feature_extractor(self, + x, + in_channels=64, + block_channels=(64, 96, 128), + out_channels=128, + t=6, + num_blocks=(3, 3, 3)): + x, _ = self._inverted_blocks(x, in_channels, t, block_channels[0], + num_blocks[0], 2, 'inverted_block_1') + x, _ = self._inverted_blocks(x, block_channels[0], t, + block_channels[1], num_blocks[1], 2, + 'inverted_block_2') + x, _ = self._inverted_blocks(x, block_channels[1], t, + block_channels[2], num_blocks[2], 1, + 'inverted_block_3') + x = self._psp_module(x, block_channels[2] // 4) + + with scope('out'): + x = relu(bn(conv(x, out_channels, 1))) + + return x + + def _classifier(self, x, dw_channels, stride=1): + with scope('dsconv1'): + x = separate_conv( + x, dw_channels, stride=stride, filter=3, act=fluid.layers.relu) + with scope('dsconv2'): + x = separate_conv( + x, dw_channels, stride=stride, filter=3, act=fluid.layers.relu) + + x = self._dropout2d(x, 0.1, is_train=self.mode == 'train') + x = conv(x, self.num_classes, 1, bias_attr=True) + return x diff --git a/paddlex/cv/nets/segmentation/hrnet.py b/paddlex/cv/nets/segmentation/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..6c7d8d93692e40047fa4ceb2f4153c18cee06ccd --- /dev/null +++ b/paddlex/cv/nets/segmentation/hrnet.py @@ -0,0 +1,205 @@ +# coding: utf8 +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr +from .model_utils.libs import sigmoid_to_softmax +from .model_utils.loss import softmax_with_loss +from .model_utils.loss import dice_loss +from .model_utils.loss import bce_loss +import paddlex + + +class HRNet(object): + def __init__(self, + num_classes, + mode='train', + width=18, + use_bce_loss=False, + use_dice_loss=False, + class_weight=None, + ignore_index=255, + fixed_input_shape=None): + # dice_loss或bce_loss只适用两类分割中 + if num_classes > 2 and (use_bce_loss or use_dice_loss): + raise ValueError( + "dice loss and bce loss is only applicable to binary classfication" + ) + + if class_weight is not None: + if isinstance(class_weight, list): + if len(class_weight) != num_classes: + raise ValueError( + "Length of class_weight should be equal to number of classes" + ) + elif isinstance(class_weight, str): + if class_weight.lower() != 'dynamic': + raise ValueError( + "if class_weight is string, must be dynamic!") + else: + raise TypeError( + 'Expect class_weight is a list or string but receive {}'. + format(type(class_weight))) + + self.num_classes = num_classes + self.mode = mode + self.use_bce_loss = use_bce_loss + self.use_dice_loss = use_dice_loss + self.class_weight = class_weight + self.ignore_index = ignore_index + self.fixed_input_shape = fixed_input_shape + self.backbone = paddlex.cv.nets.hrnet.HRNet( + width=width, feature_maps="stage4") + + def build_net(self, inputs): + if self.use_dice_loss or self.use_bce_loss: + self.num_classes = 1 + image = inputs['image'] + st4 = self.backbone(image) + # upsample + shape = fluid.layers.shape(st4[0])[-2:] + st4[1] = fluid.layers.resize_bilinear(st4[1], out_shape=shape) + st4[2] = fluid.layers.resize_bilinear(st4[2], out_shape=shape) + st4[3] = fluid.layers.resize_bilinear(st4[3], out_shape=shape) + + out = fluid.layers.concat(st4, axis=1) + last_channels = sum(self.backbone.channels[self.backbone.width][-1]) + + out = self._conv_bn_layer( + input=out, + filter_size=1, + num_filters=last_channels, + stride=1, + if_act=True, + name='conv-2') + out = fluid.layers.conv2d( + input=out, + num_filters=self.num_classes, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr( + initializer=MSRA(), name='conv-1_weights'), + bias_attr=False) + + input_shape = fluid.layers.shape(image)[-2:] + logit = fluid.layers.resize_bilinear(out, input_shape) + + if self.num_classes == 1: + out = sigmoid_to_softmax(logit) + out = fluid.layers.transpose(out, [0, 2, 3, 1]) + else: + out = fluid.layers.transpose(logit, [0, 2, 3, 1]) + + pred = fluid.layers.argmax(out, axis=3) + pred = fluid.layers.unsqueeze(pred, axes=[3]) + + if self.mode == 'train': + label = inputs['label'] + mask = label != self.ignore_index + return self._get_loss(logit, label, mask) + elif self.mode == 'eval': + label = inputs['label'] + mask = label != self.ignore_index + loss = self._get_loss(logit, label, mask) + return loss, pred, label, mask + else: + if self.num_classes == 1: + logit = sigmoid_to_softmax(logit) + else: + logit = fluid.layers.softmax(logit, axis=1) + return pred, logit + + def generate_inputs(self): + inputs = OrderedDict() + + if self.fixed_input_shape is not None: + input_shape = [ + None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0] + ] + inputs['image'] = fluid.data( + dtype='float32', shape=input_shape, name='image') + else: + inputs['image'] = fluid.data( + dtype='float32', shape=[None, 3, None, None], name='image') + if self.mode == 'train': + inputs['label'] = fluid.data( + dtype='int32', shape=[None, 1, None, None], name='label') + elif self.mode == 'eval': + inputs['label'] = fluid.data( + dtype='int32', shape=[None, 1, None, None], name='label') + return inputs + + def _get_loss(self, logit, label, mask): + avg_loss = 0 + if not (self.use_dice_loss or self.use_bce_loss): + avg_loss += softmax_with_loss( + logit, + label, + mask, + num_classes=self.num_classes, + weight=self.class_weight, + ignore_index=self.ignore_index) + else: + if self.use_dice_loss: + avg_loss += dice_loss(logit, label, mask) + if self.use_bce_loss: + avg_loss += bce_loss( + logit, label, mask, ignore_index=self.ignore_index) + + return avg_loss + + def _conv_bn_layer(self, + input, + filter_size, + num_filters, + stride=1, + padding=1, + num_groups=1, + if_act=True, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + act=None, + param_attr=ParamAttr( + initializer=MSRA(), name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr( + name=bn_name + "_scale", + initializer=fluid.initializer.Constant(1.0)), + bias_attr=ParamAttr( + name=bn_name + "_offset", + initializer=fluid.initializer.Constant(0.0)), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + bn = fluid.layers.relu(bn) + return bn diff --git a/paddlex/cv/nets/segmentation/unet.py b/paddlex/cv/nets/segmentation/unet.py index 899ba1621ea39d7e11623259ed90744f823b179c..f3675ea5124d684809ebaf1138545a4ea6fd6cec 100644 --- a/paddlex/cv/nets/segmentation/unet.py +++ b/paddlex/cv/nets/segmentation/unet.py @@ -27,7 +27,6 @@ from .model_utils.libs import sigmoid_to_softmax from .model_utils.loss import softmax_with_loss from .model_utils.loss import dice_loss from .model_utils.loss import bce_loss -import paddlex.utils.logging as logging class UNet(object): @@ -106,7 +105,8 @@ class UNet(object): name='weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.33)) + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.33)) with scope("conv0"): data = bn_relu( conv( @@ -140,8 +140,7 @@ class UNet(object): name='weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), - initializer=fluid.initializer.XavierInitializer(), - ) + initializer=fluid.initializer.XavierInitializer(), ) with scope("up"): if self.upsample_mode == 'bilinear': short_cut_shape = fluid.layers.shape(short_cut) @@ -197,7 +196,8 @@ class UNet(object): name='weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.01)) with scope("logit"): data = conv( data, diff --git a/paddlex/cv/nets/shufflenet_v2.py b/paddlex/cv/nets/shufflenet_v2.py index f663535c31f9c899799b696351ba415049c83bf6..23045ee0d7279011ad93160e778dfd88862b9953 100644 --- a/paddlex/cv/nets/shufflenet_v2.py +++ b/paddlex/cv/nets/shufflenet_v2.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division @@ -269,4 +269,4 @@ class ShuffleNetV2(): name='stage_' + name + '_conv3') out = fluid.layers.concat([conv_linear_1, conv_linear_2], axis=1) - return self.channel_shuffle(out, 2) \ No newline at end of file + return self.channel_shuffle(out, 2) diff --git a/paddlex/cv/transforms/cls_transforms.py b/paddlex/cv/transforms/cls_transforms.py index 55f58fcbefd7bc9a8e426aa3aa100dc0807f0f71..dbcd34222daf71c05c8f26a2a38c94faacb526f2 100644 --- a/paddlex/cv/transforms/cls_transforms.py +++ b/paddlex/cv/transforms/cls_transforms.py @@ -18,6 +18,7 @@ import random import os.path as osp import numpy as np from PIL import Image, ImageEnhance +import paddlex.utils.logging as logging class ClsTransform: @@ -92,6 +93,16 @@ class Compose(ClsTransform): outputs = (im, label) return outputs + def add_augmenters(self, augmenters): + if not isinstance(augmenters, list): + raise Exception( + "augmenters should be list type in func add_augmenters()") + transform_names = [type(x).__name__ for x in self.transforms] + for aug in augmenters: + if type(aug).__name__ in transform_names: + logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) + self.transforms = augmenters + self.transforms + class RandomCrop(ClsTransform): """对图像进行随机剪裁,模型训练时的数据增强操作。 @@ -103,14 +114,14 @@ class RandomCrop(ClsTransform): Args: crop_size (int): 随机裁剪后重新调整的目标边长。默认为224。 - lower_scale (float): 裁剪面积相对原面积比例的最小限制。默认为0.88。 + lower_scale (float): 裁剪面积相对原面积比例的最小限制。默认为0.08。 lower_ratio (float): 宽变换比例的最小限制。默认为3. / 4。 upper_ratio (float): 宽变换比例的最大限制。默认为4. / 3。 """ def __init__(self, crop_size=224, - lower_scale=0.88, + lower_scale=0.08, lower_ratio=3. / 4, upper_ratio=4. / 3): self.crop_size = crop_size @@ -461,3 +472,56 @@ class ArrangeClassifier(ClsTransform): else: outputs = (im, ) return outputs + + +class ComposedClsTransforms(Compose): + """ 分类模型的基础Transforms流程,具体如下 + 训练阶段: + 1. 随机从图像中crop一块子图,并resize成crop_size大小 + 2. 将1的输出按0.5的概率随机进行水平翻转 + 3. 将图像进行归一化 + 验证/预测阶段: + 1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14 + 2. 从图像中心crop出一个大小为crop_size的图像 + 3. 将图像进行归一化 + + Args: + mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + crop_size(int|list): 输入模型里的图像大小 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + crop_size=[224, 224], + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]): + width = crop_size + if isinstance(crop_size, list): + if crop_size[0] != crop_size[1]: + raise Exception( + "In classifier model, width and height should be equal, please modify your parameter `crop_size`" + ) + width = crop_size[0] + if width % 32 != 0: + raise Exception( + "In classifier model, width and height should be multiple of 32, e.g 224、256、320...., please modify your parameter `crop_size`" + ) + + if mode == 'train': + # 训练时的transforms,包含数据增强 + transforms = [ + RandomCrop(crop_size=width), RandomHorizontalFlip(prob=0.5), + Normalize( + mean=mean, std=std) + ] + else: + # 验证/预测时的transforms + transforms = [ + ResizeByShort(short_size=int(width * 1.14)), + CenterCrop(crop_size=width), Normalize( + mean=mean, std=std) + ] + + super(ComposedClsTransforms, self).__init__(transforms) diff --git a/paddlex/cv/transforms/det_transforms.py b/paddlex/cv/transforms/det_transforms.py index ca892d75bc7913cd1e238bb96e82e76f9a4fd716..0b96d6b4d32f245ec4315851d8edd221776bb6a0 100644 --- a/paddlex/cv/transforms/det_transforms.py +++ b/paddlex/cv/transforms/det_transforms.py @@ -27,6 +27,7 @@ from PIL import Image, ImageEnhance from .imgaug_support import execute_imgaug from .ops import * from .box_utils import * +import paddlex.utils.logging as logging class DetTransform: @@ -152,6 +153,16 @@ class Compose(DetTransform): outputs = (im, im_info) return outputs + def add_augmenters(self, augmenters): + if not isinstance(augmenters, list): + raise Exception( + "augmenters should be list type in func add_augmenters()") + transform_names = [type(x).__name__ for x in self.transforms] + for aug in augmenters: + if type(aug).__name__ in transform_names: + logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) + self.transforms = augmenters + self.transforms + class ResizeByShort(DetTransform): """根据图像的短边调整图像大小(resize)。 @@ -716,22 +727,38 @@ class MixupImage(DetTransform): 'Becasuse gt_bbox/gt_class/gt_score is not in label_info!') gt_bbox1 = label_info['gt_bbox'] gt_bbox2 = im_info['mixup'][2]['gt_bbox'] - gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0) gt_class1 = label_info['gt_class'] gt_class2 = im_info['mixup'][2]['gt_class'] - gt_class = np.concatenate((gt_class1, gt_class2), axis=0) - gt_score1 = label_info['gt_score'] gt_score2 = im_info['mixup'][2]['gt_score'] - gt_score = np.concatenate( - (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0) if 'gt_poly' in label_info: gt_poly1 = label_info['gt_poly'] gt_poly2 = im_info['mixup'][2]['gt_poly'] - label_info['gt_poly'] = gt_poly1 + gt_poly2 is_crowd1 = label_info['is_crowd'] is_crowd2 = im_info['mixup'][2]['is_crowd'] - is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0) + + if 0 not in gt_class1 and 0 not in gt_class2: + gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0) + gt_class = np.concatenate((gt_class1, gt_class2), axis=0) + gt_score = np.concatenate( + (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0) + if 'gt_poly' in label_info: + label_info['gt_poly'] = gt_poly1 + gt_poly2 + is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0) + elif 0 in gt_class1: + gt_bbox = gt_bbox2 + gt_class = gt_class2 + gt_score = gt_score2 * (1. - factor) + if 'gt_poly' in label_info: + label_info['gt_poly'] = gt_poly2 + is_crowd = is_crowd2 + else: + gt_bbox = gt_bbox1 + gt_class = gt_class1 + gt_score = gt_score1 * factor + if 'gt_poly' in label_info: + label_info['gt_poly'] = gt_poly1 + is_crowd = is_crowd1 label_info['gt_bbox'] = gt_bbox label_info['gt_score'] = gt_score label_info['gt_class'] = gt_class @@ -803,6 +830,8 @@ class RandomExpand(DetTransform): if np.random.uniform(0., 1.) < self.prob: return (im, im_info, label_info) + if 'gt_class' in label_info and 0 in label_info['gt_class']: + return (im, im_info, label_info) image_shape = im_info['image_shape'] height = int(image_shape[0]) width = int(image_shape[1]) @@ -898,6 +927,8 @@ class RandomCrop(DetTransform): if len(label_info['gt_bbox']) == 0: return (im, im_info, label_info) + if 'gt_class' in label_info and 0 in label_info['gt_class']: + return (im, im_info, label_info) image_shape = im_info['image_shape'] w = image_shape[1] @@ -1193,9 +1224,10 @@ class ArrangeYOLOv3(DetTransform): if gt_num > 0: label_info['gt_class'][:gt_num, 0] = label_info[ 'gt_class'][:gt_num, 0] - 1 - gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :] - gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0] - gt_score[:gt_num] = label_info['gt_score'][:gt_num, 0] + if -1 not in label_info['gt_class']: + gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :] + gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0] + gt_score[:gt_num] = label_info['gt_score'][:gt_num, 0] # parse [x1, y1, x2, y2] to [x, y, w, h] gt_bbox[:, 2:4] = gt_bbox[:, 2:4] - gt_bbox[:, :2] gt_bbox[:, :2] = gt_bbox[:, :2] + gt_bbox[:, 2:4] / 2. @@ -1227,3 +1259,108 @@ class ArrangeYOLOv3(DetTransform): im_shape = im_info['image_shape'] outputs = (im, im_shape) return outputs + + +class ComposedRCNNTransforms(Compose): + """ RCNN模型(faster-rcnn/mask-rcnn)图像处理流程,具体如下, + 训练阶段: + 1. 随机以0.5的概率将图像水平翻转 + 2. 图像归一化 + 3. 图像按比例Resize,scale计算方式如下 + scale = min_max_size[0] / short_size_of_image + if max_size_of_image * scale > min_max_size[1]: + scale = min_max_size[1] / max_size_of_image + 4. 将3步骤的长宽进行padding,使得长宽为32的倍数 + 验证阶段: + 1. 图像归一化 + 2. 图像按比例Resize,scale计算方式同上训练阶段 + 3. 将2步骤的长宽进行padding,使得长宽为32的倍数 + + Args: + mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + min_max_size(list): 图像在缩放时,最小边和最大边的约束条件 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + min_max_size=[800, 1333], + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]): + if mode == 'train': + # 训练时的transforms,包含数据增强 + transforms = [ + RandomHorizontalFlip(prob=0.5), Normalize( + mean=mean, std=std), ResizeByShort( + short_size=min_max_size[0], max_size=min_max_size[1]), + Padding(coarsest_stride=32) + ] + else: + # 验证/预测时的transforms + transforms = [ + Normalize( + mean=mean, std=std), ResizeByShort( + short_size=min_max_size[0], max_size=min_max_size[1]), + Padding(coarsest_stride=32) + ] + + super(ComposedRCNNTransforms, self).__init__(transforms) + + +class ComposedYOLOv3Transforms(Compose): + """YOLOv3模型的图像预处理流程,具体如下, + 训练阶段: + 1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage + 2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调 + 3. 随机扩充图像,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#randomexpand + 4. 随机裁剪图像 + 5. 将4步骤的输出图像Resize成shape参数的大小 + 6. 随机0.5的概率水平翻转图像 + 7. 图像归一化 + 验证/预测阶段: + 1. 将图像Resize成shape参数大小 + 2. 图像归一化 + + Args: + mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + shape(list): 输入模型中图像的大小,输入模型的图像会被Resize成此大小 + mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + shape=[608, 608], + mixup_epoch=250, + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]): + width = shape + if isinstance(shape, list): + if shape[0] != shape[1]: + raise Exception( + "In YOLOv3 model, width and height should be equal") + width = shape[0] + if width % 32 != 0: + raise Exception( + "In YOLOv3 model, width and height should be multiple of 32, e.g 224、256、320...." + ) + + if mode == 'train': + # 训练时的transforms,包含数据增强 + transforms = [ + MixupImage(mixup_epoch=mixup_epoch), RandomDistort(), + RandomExpand(), RandomCrop(), Resize( + target_size=width, + interp='RANDOM'), RandomHorizontalFlip(), Normalize( + mean=mean, std=std) + ] + else: + # 验证/预测时的transforms + transforms = [ + Resize( + target_size=width, interp='CUBIC'), Normalize( + mean=mean, std=std) + ] + super(ComposedYOLOv3Transforms, self).__init__(transforms) diff --git a/paddlex/cv/transforms/imgaug_support.py b/paddlex/cv/transforms/imgaug_support.py index 3924acd1821fc9fc4f85b8c486e4f2a20f4e8543..edaaba958d7501861ae36eac3dab8900af1ddb8f 100644 --- a/paddlex/cv/transforms/imgaug_support.py +++ b/paddlex/cv/transforms/imgaug_support.py @@ -23,7 +23,7 @@ def execute_imgaug(augmenter, im, bboxes=None, polygons=None, import imgaug.augmentables.bbs as bbs aug_im = im.astype('uint8') - aug_im = augmenter.augment(image=aug_im) + aug_im = augmenter.augment(image=aug_im).astype('float32') return aug_im # TODO imgaug的标注处理逻辑与paddlex已存的transform存在部分差异 diff --git a/paddlex/cv/transforms/seg_transforms.py b/paddlex/cv/transforms/seg_transforms.py index e562ba2601677085fdef23c57a6779ba77143f8d..9ea1c3bdc2159dbc1f33ac5f15dc710e12ccb83c 100644 --- a/paddlex/cv/transforms/seg_transforms.py +++ b/paddlex/cv/transforms/seg_transforms.py @@ -21,6 +21,7 @@ import numpy as np from PIL import Image import cv2 from collections import OrderedDict +import paddlex.utils.logging as logging class SegTransform: @@ -108,6 +109,16 @@ class Compose(SegTransform): outputs = (im, im_info) return outputs + def add_augmenters(self, augmenters): + if not isinstance(augmenters, list): + raise Exception( + "augmenters should be list type in func add_augmenters()") + transform_names = [type(x).__name__ for x in self.transforms] + for aug in augmenters: + if type(aug).__name__ in transform_names: + logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) + self.transforms = augmenters + self.transforms + class RandomHorizontalFlip(SegTransform): """以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。 @@ -1088,3 +1099,39 @@ class ArrangeSegmenter(SegTransform): return (im, im_info) else: return (im, ) + + +class ComposedSegTransforms(Compose): + """ 语义分割模型(UNet/DeepLabv3p)的图像处理流程,具体如下 + 训练阶段: + 1. 随机对图像以0.5的概率水平翻转 + 2. 按不同的比例随机Resize原图 + 3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小 + 4. 图像归一化 + 预测阶段: + 1. 图像归一化 + + Args: + mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' + train_crop_size(list): 模型训练阶段,随机从原图crop的大小 + mean(list): 图像均值 + std(list): 图像方差 + """ + + def __init__(self, + mode, + train_crop_size=[769, 769], + mean=[0.5, 0.5, 0.5], + std=[0.5, 0.5, 0.5]): + if mode == 'train': + # 训练时的transforms,包含数据增强 + transforms = [ + RandomHorizontalFlip(prob=0.5), ResizeStepScaling(), + RandomPaddingCrop(crop_size=train_crop_size), Normalize( + mean=mean, std=std) + ] + else: + # 验证/预测时的transforms + transforms = [Normalize(mean=mean, std=std)] + + super(ComposedSegTransforms, self).__init__(transforms) diff --git a/paddlex/deploy.py b/paddlex/deploy.py index bb2618c1d844836a4884d93218f7d67434103b8e..0aee491ecdda1609b8827f94d0412a26bf053650 100644 --- a/paddlex/deploy.py +++ b/paddlex/deploy.py @@ -97,8 +97,6 @@ class Predictor: config.disable_glog_info() if memory_optimize: config.enable_memory_optim() - else: - config.diable_memory_optim() # 开启计算图分析优化,包括OP融合等 config.switch_ir_optim(True) diff --git a/paddlex/interpret/as_data_reader/data_path_utils.py b/paddlex/interpret/as_data_reader/data_path_utils.py index e0604f4f5dfc2a622659bb537046a92cd4c2ce61..1c915050bed935c4e7f6ea34be6a231f7c05f44c 100644 --- a/paddlex/interpret/as_data_reader/data_path_utils.py +++ b/paddlex/interpret/as_data_reader/data_path_utils.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os @@ -19,4 +19,4 @@ def _find_classes(dir): classes = [d.name for d in os.scandir(dir) if d.is_dir()] classes.sort() class_to_idx = {classes[i]: i for i in range(len(classes))} - return classes, class_to_idx \ No newline at end of file + return classes, class_to_idx diff --git a/paddlex/interpret/as_data_reader/readers.py b/paddlex/interpret/as_data_reader/readers.py index 5cd76c6400327637121b16ac585d4ac052ab74d7..4b551177334c1da6546a605f2cee00518d90c57a 100644 --- a/paddlex/interpret/as_data_reader/readers.py +++ b/paddlex/interpret/as_data_reader/readers.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import sys @@ -20,6 +20,7 @@ import six import glob from .data_path_utils import _find_classes from PIL import Image +import paddlex.utils.logging as logging def resize_short(img, target_size, interpolation=None): @@ -117,7 +118,7 @@ def read_image(img_path, target_size=256, crop_size=224): assert len(img_path.shape) == 4 return img_path else: - ValueError(f"Not recognized data type {type(img_path)}.") + ValueError("Not recognized data type {}.".format(type(img_path))) class ReaderConfig(object): @@ -156,7 +157,7 @@ class ReaderConfig(object): img = cv2.imread(img_path) if img is None: - print(img_path) + logging.info(img_path) continue img = resize_short(img, target_size, interpolation=None) img = crop_image(img, crop_size, center=self.is_test) @@ -208,7 +209,7 @@ def create_reader(list_image_path, list_label=None, is_test=False): img = cv2.imread(img_path) if img is None: - print(img_path) + logging.info(img_path) continue img = resize_short(img, target_size, interpolation=None) @@ -222,4 +223,4 @@ def create_reader(list_image_path, list_label=None, is_test=False): yield img_show, img, label - return reader \ No newline at end of file + return reader diff --git a/paddlex/interpret/core/_session_preparation.py b/paddlex/interpret/core/_session_preparation.py index f75fa2464fe43969ec76c557c43344c0f2ae877f..81d3b22b216f07047b6a3a4c39701a03ec96a964 100644 --- a/paddlex/interpret/core/_session_preparation.py +++ b/paddlex/interpret/core/_session_preparation.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import os.path as osp @@ -20,6 +20,7 @@ import numpy as np from paddle.fluid.param_attr import ParamAttr from paddlex.interpret.as_data_reader.readers import preprocess_image + def gen_user_home(): if "HOME" in os.environ: home_path = os.environ["HOME"] @@ -34,10 +35,20 @@ def paddle_get_fc_weights(var_name="fc_0.w_0"): def paddle_resize(extracted_features, outsize): - resized_features = fluid.layers.resize_bilinear(extracted_features, outsize) + resized_features = fluid.layers.resize_bilinear(extracted_features, + outsize) return resized_features +def get_precomputed_normlime_weights(): + root_path = gen_user_home() + root_path = osp.join(root_path, '.paddlex') + h_pre_models = osp.join(root_path, "pre_models") + normlime_weights_file = osp.join( + h_pre_models, "normlime_weights_imagenet_resnet50vc.npy") + return np.load(normlime_weights_file, allow_pickle=True).item() + + def compute_features_for_kmeans(data_content): root_path = gen_user_home() root_path = osp.join(root_path, '.paddlex') @@ -47,6 +58,7 @@ def compute_features_for_kmeans(data_content): os.makedirs(root_path) url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz" pdx.utils.download_and_decompress(url, path=root_path) + def conv_bn_layer(input, num_filters, filter_size, @@ -55,7 +67,7 @@ def compute_features_for_kmeans(data_content): act=None, name=None, is_test=True, - global_name=''): + global_name='for_kmeans_'): conv = fluid.layers.conv2d( input=input, num_filters=num_filters, @@ -79,14 +91,14 @@ def compute_features_for_kmeans(data_content): bias_attr=ParamAttr(global_name + bn_name + '_offset'), moving_mean_name=global_name + bn_name + '_mean', moving_variance_name=global_name + bn_name + '_variance', - use_global_stats=is_test - ) + use_global_stats=is_test) startup_prog = fluid.default_startup_program().clone(for_test=True) prog = fluid.Program() with fluid.program_guard(prog, startup_prog): with fluid.unique_name.guard(): - image_op = fluid.data(name='image', shape=[None, 3, 224, 224], dtype='float32') + image_op = fluid.data( + name='image', shape=[None, 3, 224, 224], dtype='float32') conv = conv_bn_layer( input=image_op, @@ -110,7 +122,8 @@ def compute_features_for_kmeans(data_content): act='relu', name='conv1_3') extracted_features = conv - resized_features = fluid.layers.resize_bilinear(extracted_features, image_op.shape[2:]) + resized_features = fluid.layers.resize_bilinear(extracted_features, + image_op.shape[2:]) gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) @@ -119,7 +132,10 @@ def compute_features_for_kmeans(data_content): exe.run(startup_prog) fluid.io.load_persistables(exe, h_pre_models, prog) - images = preprocess_image(data_content) # transpose to [N, 3, H, W], scaled to [0.0, 1.0] - result = exe.run(prog, fetch_list=[resized_features], feed={'image': images}) + images = preprocess_image( + data_content) # transpose to [N, 3, H, W], scaled to [0.0, 1.0] + result = exe.run(prog, + fetch_list=[resized_features], + feed={'image': images}) return result[0][0] diff --git a/paddlex/interpret/core/interpretation.py b/paddlex/interpret/core/interpretation.py index 72d8c238a2e1817098eefcae18b0a3b56aedeb6b..ca3b1cf3371f244a1ab55e6940de2cd382fd7ab3 100644 --- a/paddlex/interpret/core/interpretation.py +++ b/paddlex/interpret/core/interpretation.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from .interpretation_algorithms import CAM, LIME, NormLIME from .normlime_base import precompute_normlime_weights @@ -20,12 +20,10 @@ class Interpretation(object): """ Base class for all interpretation algorithms. """ - def __init__(self, interpretation_algorithm_name, predict_fn, label_names, **kwargs): - supported_algorithms = { - 'cam': CAM, - 'lime': LIME, - 'normlime': NormLIME - } + + def __init__(self, interpretation_algorithm_name, predict_fn, label_names, + **kwargs): + supported_algorithms = {'cam': CAM, 'lime': LIME, 'normlime': NormLIME} self.algorithm_name = interpretation_algorithm_name.lower() assert self.algorithm_name in supported_algorithms.keys() @@ -33,19 +31,17 @@ class Interpretation(object): # initialization for the interpretation algorithm. self.algorithm = supported_algorithms[self.algorithm_name]( - self.predict_fn, label_names, **kwargs - ) + self.predict_fn, label_names, **kwargs) - def interpret(self, data_, visualization=True, save_to_disk=True, save_dir='./tmp'): + def interpret(self, data_, visualization=True, save_dir='./'): """ Args: data_: data_ can be a path or numpy.ndarray. visualization: whether to show using matplotlib. - save_to_disk: whether to save the figure in local disk. save_dir: dir to save figure if save_to_disk is True. Returns: """ - return self.algorithm.interpret(data_, visualization, save_to_disk, save_dir) + return self.algorithm.interpret(data_, visualization, save_dir) diff --git a/paddlex/interpret/core/interpretation_algorithms.py b/paddlex/interpret/core/interpretation_algorithms.py index 507e47bf372ab6ce81fbf9db4062cc5dae3a0b74..2805af601a91314a5d554511af04b53eef7b653a 100644 --- a/paddlex/interpret/core/interpretation_algorithms.py +++ b/paddlex/interpret/core/interpretation_algorithms.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import os.path as osp @@ -21,7 +21,7 @@ from . import lime_base from ._session_preparation import paddle_get_fc_weights, compute_features_for_kmeans, gen_user_home from .normlime_base import combine_normlime_and_lime, get_feature_for_kmeans, load_kmeans_model from paddlex.interpret.as_data_reader.readers import read_image - +import paddlex.utils.logging as logging import cv2 @@ -65,24 +65,27 @@ class CAM(object): fc_weights = paddle_get_fc_weights() feature_maps = result[1] - + l = pred_label[0] ln = l if self.label_names is not None: ln = self.label_names[l] - print(f'predicted result: {ln} with probability {probability[pred_label[0]]:.3f}') + prob_str = "%.3f" % (probability[pred_label[0]]) + logging.info("predicted result: {} with probability {}.".format( + ln, prob_str)) return feature_maps, fc_weights - def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None): + def interpret(self, data_, visualization=True, save_outdir=None): feature_maps, fc_weights = self.preparation_cam(data_) - cam = get_cam(self.image, feature_maps, fc_weights, self.predicted_label) + cam = get_cam(self.image, feature_maps, fc_weights, + self.predicted_label) - if visualization or save_to_disk: + if visualization or save_outdir is not None: import matplotlib.pyplot as plt from skimage.segmentation import mark_boundaries l = self.labels[0] - ln = l + ln = l if self.label_names is not None: ln = self.label_names[l] @@ -91,18 +94,19 @@ class CAM(object): ncols = 2 plt.close() - f, axes = plt.subplots(nrows, ncols, figsize=(psize * ncols, psize * nrows)) + f, axes = plt.subplots( + nrows, ncols, figsize=(psize * ncols, psize * nrows)) for ax in axes.ravel(): ax.axis("off") axes = axes.ravel() axes[0].imshow(self.image) - axes[0].set_title(f"label {ln}, proba: {self.predicted_probability: .3f}") + prob_str = "{%.3f}" % (self.predicted_probability) + axes[0].set_title("label {}, proba: {}".format(ln, prob_str)) axes[1].imshow(cam) axes[1].set_title("CAM") - if save_to_disk and save_outdir is not None: - os.makedirs(save_outdir, exist_ok=True) + if save_outdir is not None: save_fig(data_, save_outdir, 'cam') if visualization: @@ -112,7 +116,11 @@ class CAM(object): class LIME(object): - def __init__(self, predict_fn, label_names, num_samples=3000, batch_size=50): + def __init__(self, + predict_fn, + label_names, + num_samples=3000, + batch_size=50): """ LIME wrapper. See lime_base.py for the detailed LIME implementation. Args: @@ -151,30 +159,37 @@ class LIME(object): self.predicted_probability = probability[pred_label[0]] self.image = image_show[0] self.labels = pred_label - + l = pred_label[0] ln = l if self.label_names is not None: ln = self.label_names[l] - - print(f'predicted result: {ln} with probability {probability[pred_label[0]]:.3f}') + + prob_str = "%.3f" % (probability[pred_label[0]]) + logging.info("predicted result: {} with probability {}.".format( + ln, prob_str)) end = time.time() algo = lime_base.LimeImageInterpreter() - interpreter = algo.interpret_instance(self.image, self.predict_fn, self.labels, 0, - num_samples=self.num_samples, batch_size=self.batch_size) + interpreter = algo.interpret_instance( + self.image, + self.predict_fn, + self.labels, + 0, + num_samples=self.num_samples, + batch_size=self.batch_size) self.lime_interpreter = interpreter - print('lime time: ', time.time() - end, 's.') + logging.info('lime time: ' + str(time.time() - end) + 's.') - def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None): + def interpret(self, data_, visualization=True, save_outdir=None): if self.lime_interpreter is None: self.preparation_lime(data_) - if visualization or save_to_disk: + if visualization or save_outdir is not None: import matplotlib.pyplot as plt from skimage.segmentation import mark_boundaries l = self.labels[0] - ln = l + ln = l if self.label_names is not None: ln = self.label_names[l] @@ -184,27 +199,33 @@ class LIME(object): ncols = len(weights_choices) plt.close() - f, axes = plt.subplots(nrows, ncols, figsize=(psize * ncols, psize * nrows)) + f, axes = plt.subplots( + nrows, ncols, figsize=(psize * ncols, psize * nrows)) for ax in axes.ravel(): ax.axis("off") axes = axes.ravel() axes[0].imshow(self.image) - axes[0].set_title(f"label {ln}, proba: {self.predicted_probability: .3f}") + prob_str = "{%.3f}" % (self.predicted_probability) + axes[0].set_title("label {}, proba: {}".format(ln, prob_str)) - axes[1].imshow(mark_boundaries(self.image, self.lime_interpreter.segments)) + axes[1].imshow( + mark_boundaries(self.image, self.lime_interpreter.segments)) axes[1].set_title("superpixel segmentation") # LIME visualization for i, w in enumerate(weights_choices): - num_to_show = auto_choose_num_features_to_show(self.lime_interpreter, l, w) + num_to_show = auto_choose_num_features_to_show( + self.lime_interpreter, l, w) temp, mask = self.lime_interpreter.get_image_and_mask( - l, positive_only=False, hide_rest=False, num_features=num_to_show - ) + l, + positive_only=True, + hide_rest=False, + num_features=num_to_show) axes[ncols + i].imshow(mark_boundaries(temp, mask)) - axes[ncols + i].set_title(f"label {ln}, first {num_to_show} superpixels") + axes[ncols + i].set_title( + "label {}, first {} superpixels".format(ln, num_to_show)) - if save_to_disk and save_outdir is not None: - os.makedirs(save_outdir, exist_ok=True) + if save_outdir is not None: save_fig(data_, save_outdir, 'lime', self.num_samples) if visualization: @@ -213,9 +234,196 @@ class LIME(object): return +class NormLIMEStandard(object): + def __init__(self, + predict_fn, + label_names, + num_samples=3000, + batch_size=50, + kmeans_model_for_normlime=None, + normlime_weights=None): + root_path = gen_user_home() + root_path = osp.join(root_path, '.paddlex') + h_pre_models = osp.join(root_path, "pre_models") + if not osp.exists(h_pre_models): + if not osp.exists(root_path): + os.makedirs(root_path) + url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz" + pdx.utils.download_and_decompress(url, path=root_path) + h_pre_models_kmeans = osp.join(h_pre_models, "kmeans_model.pkl") + if kmeans_model_for_normlime is None: + try: + self.kmeans_model = load_kmeans_model(h_pre_models_kmeans) + except: + raise ValueError( + "NormLIME needs the KMeans model, where we provided a default one in " + "pre_models/kmeans_model.pkl.") + else: + logging.debug("Warning: It is *strongly* suggested to use the \ + default KMeans model in pre_models/kmeans_model.pkl. \ + Use another one will change the final result.") + self.kmeans_model = load_kmeans_model(kmeans_model_for_normlime) + + self.num_samples = num_samples + self.batch_size = batch_size + + try: + self.normlime_weights = np.load( + normlime_weights, allow_pickle=True).item() + except: + self.normlime_weights = None + logging.debug( + "Warning: not find the correct precomputed Normlime result.") + + self.predict_fn = predict_fn + + self.labels = None + self.image = None + self.label_names = label_names + + def predict_cluster_labels(self, feature_map, segments): + X = get_feature_for_kmeans(feature_map, segments) + try: + cluster_labels = self.kmeans_model.predict(X) + except AttributeError: + from sklearn.metrics import pairwise_distances_argmin_min + cluster_labels, _ = pairwise_distances_argmin_min( + X, self.kmeans_model.cluster_centers_) + return cluster_labels + + def predict_using_normlime_weights(self, pred_labels, + predicted_cluster_labels): + # global weights + g_weights = {y: [] for y in pred_labels} + for y in pred_labels: + cluster_weights_y = self.normlime_weights.get(y, {}) + g_weights[y] = [(i, cluster_weights_y.get(k, 0.0)) + for i, k in enumerate(predicted_cluster_labels)] + + g_weights[y] = sorted( + g_weights[y], key=lambda x: np.abs(x[1]), reverse=True) + + return g_weights + + def preparation_normlime(self, data_): + self._lime = LIME(self.predict_fn, self.label_names, self.num_samples, + self.batch_size) + self._lime.preparation_lime(data_) + + image_show = read_image(data_) + + self.predicted_label = self._lime.predicted_label + self.predicted_probability = self._lime.predicted_probability + self.image = image_show[0] + self.labels = self._lime.labels + logging.info('performing NormLIME operations ...') + + cluster_labels = self.predict_cluster_labels( + compute_features_for_kmeans(image_show).transpose((1, 2, 0)), + self._lime.lime_interpreter.segments) + + g_weights = self.predict_using_normlime_weights(self.labels, + cluster_labels) + + return g_weights + + def interpret(self, data_, visualization=True, save_outdir=None): + if self.normlime_weights is None: + raise ValueError( + "Not find the correct precomputed NormLIME result. \n" + "\t Try to call compute_normlime_weights() first or load the correct path." + ) + + g_weights = self.preparation_normlime(data_) + lime_weights = self._lime.lime_interpreter.local_weights + + if visualization or save_outdir is not None: + import matplotlib.pyplot as plt + from skimage.segmentation import mark_boundaries + l = self.labels[0] + ln = l + if self.label_names is not None: + ln = self.label_names[l] + + psize = 5 + nrows = 4 + weights_choices = [0.6, 0.7, 0.75, 0.8, 0.85] + nums_to_show = [] + ncols = len(weights_choices) + + plt.close() + f, axes = plt.subplots( + nrows, ncols, figsize=(psize * ncols, psize * nrows)) + for ax in axes.ravel(): + ax.axis("off") + + axes = axes.ravel() + axes[0].imshow(self.image) + prob_str = "{%.3f}" % (self.predicted_probability) + axes[0].set_title("label {}, proba: {}".format(ln, prob_str)) + + axes[1].imshow( + mark_boundaries(self.image, + self._lime.lime_interpreter.segments)) + axes[1].set_title("superpixel segmentation") + + # LIME visualization + for i, w in enumerate(weights_choices): + num_to_show = auto_choose_num_features_to_show( + self._lime.lime_interpreter, l, w) + nums_to_show.append(num_to_show) + temp, mask = self._lime.lime_interpreter.get_image_and_mask( + l, + positive_only=False, + hide_rest=False, + num_features=num_to_show) + axes[ncols + i].imshow(mark_boundaries(temp, mask)) + axes[ncols + i].set_title("LIME: first {} superpixels".format( + num_to_show)) + + # NormLIME visualization + self._lime.lime_interpreter.local_weights = g_weights + for i, num_to_show in enumerate(nums_to_show): + temp, mask = self._lime.lime_interpreter.get_image_and_mask( + l, + positive_only=False, + hide_rest=False, + num_features=num_to_show) + axes[ncols * 2 + i].imshow(mark_boundaries(temp, mask)) + axes[ncols * 2 + i].set_title( + "NormLIME: first {} superpixels".format(num_to_show)) + + # NormLIME*LIME visualization + combined_weights = combine_normlime_and_lime(lime_weights, + g_weights) + self._lime.lime_interpreter.local_weights = combined_weights + for i, num_to_show in enumerate(nums_to_show): + temp, mask = self._lime.lime_interpreter.get_image_and_mask( + l, + positive_only=False, + hide_rest=False, + num_features=num_to_show) + axes[ncols * 3 + i].imshow(mark_boundaries(temp, mask)) + axes[ncols * 3 + i].set_title( + "Combined: first {} superpixels".format(num_to_show)) + + self._lime.lime_interpreter.local_weights = lime_weights + + if save_outdir is not None: + save_fig(data_, save_outdir, 'normlime', self.num_samples) + + if visualization: + plt.show() + + class NormLIME(object): - def __init__(self, predict_fn, label_names, num_samples=3000, batch_size=50, - kmeans_model_for_normlime=None, normlime_weights=None): + def __init__(self, + predict_fn, + label_names, + num_samples=3000, + batch_size=50, + kmeans_model_for_normlime=None, + normlime_weights=None): root_path = gen_user_home() root_path = osp.join(root_path, '.paddlex') h_pre_models = osp.join(root_path, "pre_models") @@ -229,21 +437,25 @@ class NormLIME(object): try: self.kmeans_model = load_kmeans_model(h_pre_models_kmeans) except: - raise ValueError("NormLIME needs the KMeans model, where we provided a default one in " - "pre_models/kmeans_model.pkl.") + raise ValueError( + "NormLIME needs the KMeans model, where we provided a default one in " + "pre_models/kmeans_model.pkl.") else: - print("Warning: It is *strongly* suggested to use the default KMeans model in pre_models/kmeans_model.pkl. " - "Use another one will change the final result.") + logging.debug("Warning: It is *strongly* suggested to use the \ + default KMeans model in pre_models/kmeans_model.pkl. \ + Use another one will change the final result.") self.kmeans_model = load_kmeans_model(kmeans_model_for_normlime) self.num_samples = num_samples self.batch_size = batch_size try: - self.normlime_weights = np.load(normlime_weights, allow_pickle=True).item() + self.normlime_weights = np.load( + normlime_weights, allow_pickle=True).item() except: self.normlime_weights = None - print("Warning: not find the correct precomputed Normlime result.") + logging.debug( + "Warning: not find the correct precomputed Normlime result.") self.predict_fn = predict_fn @@ -257,30 +469,27 @@ class NormLIME(object): cluster_labels = self.kmeans_model.predict(X) except AttributeError: from sklearn.metrics import pairwise_distances_argmin_min - cluster_labels, _ = pairwise_distances_argmin_min(X, self.kmeans_model.cluster_centers_) + cluster_labels, _ = pairwise_distances_argmin_min( + X, self.kmeans_model.cluster_centers_) return cluster_labels - def predict_using_normlime_weights(self, pred_labels, predicted_cluster_labels): + def predict_using_normlime_weights(self, pred_labels, + predicted_cluster_labels): # global weights g_weights = {y: [] for y in pred_labels} for y in pred_labels: cluster_weights_y = self.normlime_weights.get(y, {}) - g_weights[y] = [ - (i, cluster_weights_y.get(k, 0.0)) for i, k in enumerate(predicted_cluster_labels) - ] + g_weights[y] = [(i, cluster_weights_y.get(k, 0.0)) + for i, k in enumerate(predicted_cluster_labels)] - g_weights[y] = sorted(g_weights[y], - key=lambda x: np.abs(x[1]), reverse=True) + g_weights[y] = sorted( + g_weights[y], key=lambda x: np.abs(x[1]), reverse=True) return g_weights def preparation_normlime(self, data_): - self._lime = LIME( - self.predict_fn, - self.label_names, - self.num_samples, - self.batch_size - ) + self._lime = LIME(self.predict_fn, self.label_names, self.num_samples, + self.batch_size) self._lime.preparation_lime(data_) image_show = read_image(data_) @@ -289,26 +498,28 @@ class NormLIME(object): self.predicted_probability = self._lime.predicted_probability self.image = image_show[0] self.labels = self._lime.labels - # print(f'predicted result: {self.predicted_label} with probability {self.predicted_probability: .3f}') - print('performing NormLIME operations ...') + logging.info('performing NormLIME operations ...') cluster_labels = self.predict_cluster_labels( - compute_features_for_kmeans(image_show).transpose((1, 2, 0)), self._lime.lime_interpreter.segments - ) + compute_features_for_kmeans(image_show).transpose((1, 2, 0)), + self._lime.lime_interpreter.segments) - g_weights = self.predict_using_normlime_weights(self.labels, cluster_labels) + g_weights = self.predict_using_normlime_weights(self.labels, + cluster_labels) return g_weights - def interpret(self, data_, visualization=True, save_to_disk=True, save_outdir=None): + def interpret(self, data_, visualization=True, save_outdir=None): if self.normlime_weights is None: - raise ValueError("Not find the correct precomputed NormLIME result. \n" - "\t Try to call compute_normlime_weights() first or load the correct path.") + raise ValueError( + "Not find the correct precomputed NormLIME result. \n" + "\t Try to call compute_normlime_weights() first or load the correct path." + ) g_weights = self.preparation_normlime(data_) lime_weights = self._lime.lime_interpreter.local_weights - if visualization or save_to_disk: + if visualization or save_outdir is not None: import matplotlib.pyplot as plt from skimage.segmentation import mark_boundaries l = self.labels[0] @@ -323,72 +534,93 @@ class NormLIME(object): ncols = len(weights_choices) plt.close() - f, axes = plt.subplots(nrows, ncols, figsize=(psize * ncols, psize * nrows)) + f, axes = plt.subplots( + nrows, ncols, figsize=(psize * ncols, psize * nrows)) for ax in axes.ravel(): ax.axis("off") axes = axes.ravel() axes[0].imshow(self.image) - axes[0].set_title(f"label {ln}, proba: {self.predicted_probability: .3f}") + prob_str = "{%.3f}" % (self.predicted_probability) + axes[0].set_title("label {}, proba: {}".format(ln, prob_str)) - axes[1].imshow(mark_boundaries(self.image, self._lime.lime_interpreter.segments)) + axes[1].imshow( + mark_boundaries(self.image, + self._lime.lime_interpreter.segments)) axes[1].set_title("superpixel segmentation") # LIME visualization for i, w in enumerate(weights_choices): - num_to_show = auto_choose_num_features_to_show(self._lime.lime_interpreter, l, w) + num_to_show = auto_choose_num_features_to_show( + self._lime.lime_interpreter, l, w) nums_to_show.append(num_to_show) temp, mask = self._lime.lime_interpreter.get_image_and_mask( - l, positive_only=False, hide_rest=False, num_features=num_to_show - ) + l, + positive_only=True, + hide_rest=False, + num_features=num_to_show) axes[ncols + i].imshow(mark_boundaries(temp, mask)) - axes[ncols + i].set_title(f"LIME: first {num_to_show} superpixels") + axes[ncols + i].set_title("LIME: first {} superpixels".format( + num_to_show)) # NormLIME visualization self._lime.lime_interpreter.local_weights = g_weights for i, num_to_show in enumerate(nums_to_show): temp, mask = self._lime.lime_interpreter.get_image_and_mask( - l, positive_only=False, hide_rest=False, num_features=num_to_show - ) + l, + positive_only=True, + hide_rest=False, + num_features=num_to_show) axes[ncols * 2 + i].imshow(mark_boundaries(temp, mask)) - axes[ncols * 2 + i].set_title(f"NormLIME: first {num_to_show} superpixels") + axes[ncols * 2 + i].set_title( + "NormLIME: first {} superpixels".format(num_to_show)) # NormLIME*LIME visualization - combined_weights = combine_normlime_and_lime(lime_weights, g_weights) + combined_weights = combine_normlime_and_lime(lime_weights, + g_weights) + self._lime.lime_interpreter.local_weights = combined_weights for i, num_to_show in enumerate(nums_to_show): temp, mask = self._lime.lime_interpreter.get_image_and_mask( - l, positive_only=False, hide_rest=False, num_features=num_to_show - ) + l, + positive_only=True, + hide_rest=False, + num_features=num_to_show) axes[ncols * 3 + i].imshow(mark_boundaries(temp, mask)) - axes[ncols * 3 + i].set_title(f"Combined: first {num_to_show} superpixels") + axes[ncols * 3 + i].set_title( + "Combined: first {} superpixels".format(num_to_show)) self._lime.lime_interpreter.local_weights = lime_weights - if save_to_disk and save_outdir is not None: - os.makedirs(save_outdir, exist_ok=True) + if save_outdir is not None: save_fig(data_, save_outdir, 'normlime', self.num_samples) if visualization: plt.show() -def auto_choose_num_features_to_show(lime_interpreter, label, percentage_to_show): +def auto_choose_num_features_to_show(lime_interpreter, label, + percentage_to_show): segments = lime_interpreter.segments lime_weights = lime_interpreter.local_weights[label] - num_pixels_threshold_in_a_sp = segments.shape[0] * segments.shape[1] // len(np.unique(segments)) // 8 + num_pixels_threshold_in_a_sp = segments.shape[0] * segments.shape[ + 1] // len(np.unique(segments)) // 8 # l1 norm with filtered weights. - used_weights = [(tuple_w[0], tuple_w[1]) for i, tuple_w in enumerate(lime_weights) if tuple_w[1] > 0] + used_weights = [(tuple_w[0], tuple_w[1]) + for i, tuple_w in enumerate(lime_weights) + if tuple_w[1] > 0] norm = np.sum([tuple_w[1] for i, tuple_w in enumerate(used_weights)]) - normalized_weights = [(tuple_w[0], tuple_w[1] / norm) for i, tuple_w in enumerate(lime_weights)] + normalized_weights = [(tuple_w[0], tuple_w[1] / norm) + for i, tuple_w in enumerate(lime_weights)] a = 0.0 n = 0 for i, tuple_w in enumerate(normalized_weights): if tuple_w[1] < 0: continue - if len(np.where(segments == tuple_w[0])[0]) < num_pixels_threshold_in_a_sp: + if len(np.where(segments == tuple_w[0])[ + 0]) < num_pixels_threshold_in_a_sp: continue a += tuple_w[1] @@ -400,12 +632,18 @@ def auto_choose_num_features_to_show(lime_interpreter, label, percentage_to_show return 5 if n == 0: - return auto_choose_num_features_to_show(lime_interpreter, label, percentage_to_show-0.1) + return auto_choose_num_features_to_show(lime_interpreter, label, + percentage_to_show - 0.1) return n -def get_cam(image_show, feature_maps, fc_weights, label_index, cam_min=None, cam_max=None): +def get_cam(image_show, + feature_maps, + fc_weights, + label_index, + cam_min=None, + cam_max=None): _, nc, h, w = feature_maps.shape cam = feature_maps * fc_weights[:, label_index].reshape(1, nc, 1, 1) @@ -419,7 +657,8 @@ def get_cam(image_show, feature_maps, fc_weights, label_index, cam_min=None, cam cam = cam - cam_min cam = cam / cam_max cam = np.uint8(255 * cam) - cam_img = cv2.resize(cam, image_show.shape[0:2], interpolation=cv2.INTER_LINEAR) + cam_img = cv2.resize( + cam, image_show.shape[0:2], interpolation=cv2.INTER_LINEAR) heatmap = cv2.applyColorMap(np.uint8(255 * cam_img), cv2.COLORMAP_JET) heatmap = np.float32(heatmap) @@ -431,34 +670,11 @@ def get_cam(image_show, feature_maps, fc_weights, label_index, cam_min=None, cam def save_fig(data_, save_outdir, algorithm_name, num_samples=3000): import matplotlib.pyplot as plt - if isinstance(data_, str): - if algorithm_name == 'cam': - f_out = f"{algorithm_name}_{data_.split('/')[-1]}.png" - else: - f_out = f"{algorithm_name}_{data_.split('/')[-1]}_s{num_samples}.png" - plt.savefig( - os.path.join(save_outdir, f_out) - ) + if algorithm_name == 'cam': + f_out = "{}_{}.png".format(algorithm_name, data_.split('/')[-1]) else: - n = 0 - if algorithm_name == 'cam': - f_out = f'cam-{n}.png' - else: - f_out = f'{algorithm_name}_s{num_samples}-{n}.png' - while os.path.exists( - os.path.join(save_outdir, f_out) - ): - n += 1 - if algorithm_name == 'cam': - f_out = f'cam-{n}.png' - else: - f_out = f'{algorithm_name}_s{num_samples}-{n}.png' - continue - plt.savefig( - os.path.join( - save_outdir, f_out - ) - ) - print('The image of intrepretation result save in {}'.format(os.path.join( - save_outdir, f_out - ))) + f_out = "{}_{}_s{}.png".format(save_outdir, algorithm_name, + num_samples) + + plt.savefig(f_out) + logging.info('The image of intrepretation result save in {}'.format(f_out)) diff --git a/paddlex/interpret/core/lime_base.py b/paddlex/interpret/core/lime_base.py index 23969b91fc29a1324fff1a48a41de3b7c2450d8c..d7b44016ae41656c41db25572133e5a6cfc57675 100644 --- a/paddlex/interpret/core/lime_base.py +++ b/paddlex/interpret/core/lime_base.py @@ -27,21 +27,19 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. The code in this file (lime_base.py) is modified from https://github.com/marcotcr/lime. """ - import numpy as np import scipy as sp import tqdm import copy from functools import partial +import paddlex.utils.logging as logging class LimeBase(object): """Class for learning a locally linear sparse model from perturbed data""" - def __init__(self, - kernel_fn, - verbose=False, - random_state=None): + + def __init__(self, kernel_fn, verbose=False, random_state=None): """Init function Args: @@ -71,15 +69,14 @@ class LimeBase(object): """ from sklearn.linear_model import lars_path x_vector = weighted_data - alphas, _, coefs = lars_path(x_vector, - weighted_labels, - method='lasso', - verbose=False) + alphas, _, coefs = lars_path( + x_vector, weighted_labels, method='lasso', verbose=False) return alphas, coefs def forward_selection(self, data, labels, weights, num_features): """Iteratively adds features to the model""" - clf = Ridge(alpha=0, fit_intercept=True, random_state=self.random_state) + clf = Ridge( + alpha=0, fit_intercept=True, random_state=self.random_state) used_features = [] for _ in range(min(num_features, data.shape[1])): max_ = -100000000 @@ -87,11 +84,13 @@ class LimeBase(object): for feature in range(data.shape[1]): if feature in used_features: continue - clf.fit(data[:, used_features + [feature]], labels, + clf.fit(data[:, used_features + [feature]], + labels, sample_weight=weights) - score = clf.score(data[:, used_features + [feature]], - labels, - sample_weight=weights) + score = clf.score( + data[:, used_features + [feature]], + labels, + sample_weight=weights) if score > max_: best = feature max_ = score @@ -107,8 +106,8 @@ class LimeBase(object): elif method == 'forward_selection': return self.forward_selection(data, labels, weights, num_features) elif method == 'highest_weights': - clf = Ridge(alpha=0.01, fit_intercept=True, - random_state=self.random_state) + clf = Ridge( + alpha=0.01, fit_intercept=True, random_state=self.random_state) clf.fit(data, labels, sample_weight=weights) coef = clf.coef_ @@ -124,7 +123,8 @@ class LimeBase(object): nnz_indexes = argsort_data[::-1] indices = weighted_data.indices[nnz_indexes] num_to_pad = num_features - sdata - indices = np.concatenate((indices, np.zeros(num_to_pad, dtype=indices.dtype))) + indices = np.concatenate((indices, np.zeros( + num_to_pad, dtype=indices.dtype))) indices_set = set(indices) pad_counter = 0 for i in range(data.shape[1]): @@ -134,7 +134,8 @@ class LimeBase(object): if pad_counter >= num_to_pad: break else: - nnz_indexes = argsort_data[sdata - num_features:sdata][::-1] + nnz_indexes = argsort_data[sdata - num_features:sdata][:: + -1] indices = weighted_data.indices[nnz_indexes] return indices else: @@ -145,13 +146,13 @@ class LimeBase(object): reverse=True) return np.array([x[0] for x in feature_weights[:num_features]]) elif method == 'lasso_path': - weighted_data = ((data - np.average(data, axis=0, weights=weights)) - * np.sqrt(weights[:, np.newaxis])) - weighted_labels = ((labels - np.average(labels, weights=weights)) - * np.sqrt(weights)) + weighted_data = ((data - np.average( + data, axis=0, weights=weights)) * + np.sqrt(weights[:, np.newaxis])) + weighted_labels = ((labels - np.average( + labels, weights=weights)) * np.sqrt(weights)) nonzero = range(weighted_data.shape[1]) - _, coefs = self.generate_lars_path(weighted_data, - weighted_labels) + _, coefs = self.generate_lars_path(weighted_data, weighted_labels) for i in range(len(coefs.T) - 1, 0, -1): nonzero = coefs.T[i].nonzero()[0] if len(nonzero) <= num_features: @@ -163,8 +164,8 @@ class LimeBase(object): n_method = 'forward_selection' else: n_method = 'highest_weights' - return self.feature_selection(data, labels, weights, - num_features, n_method) + return self.feature_selection(data, labels, weights, num_features, + n_method) def interpret_instance_with_data(self, neighborhood_data, @@ -213,30 +214,31 @@ class LimeBase(object): weights = self.kernel_fn(distances) labels_column = neighborhood_labels[:, label] used_features = self.feature_selection(neighborhood_data, - labels_column, - weights, - num_features, - feature_selection) + labels_column, weights, + num_features, feature_selection) if model_regressor is None: - model_regressor = Ridge(alpha=1, fit_intercept=True, - random_state=self.random_state) + model_regressor = Ridge( + alpha=1, fit_intercept=True, random_state=self.random_state) easy_model = model_regressor easy_model.fit(neighborhood_data[:, used_features], - labels_column, sample_weight=weights) + labels_column, + sample_weight=weights) prediction_score = easy_model.score( neighborhood_data[:, used_features], - labels_column, sample_weight=weights) + labels_column, + sample_weight=weights) - local_pred = easy_model.predict(neighborhood_data[0, used_features].reshape(1, -1)) + local_pred = easy_model.predict(neighborhood_data[0, used_features] + .reshape(1, -1)) if self.verbose: - print('Intercept', easy_model.intercept_) - print('Prediction_local', local_pred,) - print('Right:', neighborhood_labels[0, label]) - return (easy_model.intercept_, - sorted(zip(used_features, easy_model.coef_), - key=lambda x: np.abs(x[1]), reverse=True), - prediction_score, local_pred) + logging.info('Intercept' + str(easy_model.intercept_)) + logging.info('Prediction_local' + str(local_pred)) + logging.info('Right:' + str(neighborhood_labels[0, label])) + return (easy_model.intercept_, sorted( + zip(used_features, easy_model.coef_), + key=lambda x: np.abs(x[1]), + reverse=True), prediction_score, local_pred) class ImageInterpretation(object): @@ -253,8 +255,13 @@ class ImageInterpretation(object): self.local_weights = {} self.local_pred = None - def get_image_and_mask(self, label, positive_only=True, negative_only=False, hide_rest=False, - num_features=5, min_weight=0.): + def get_image_and_mask(self, + label, + positive_only=True, + negative_only=False, + hide_rest=False, + num_features=5, + min_weight=0.): """Init function. Args: @@ -278,7 +285,9 @@ class ImageInterpretation(object): if label not in self.local_weights: raise KeyError('Label not in interpretation') if positive_only & negative_only: - raise ValueError("Positive_only and negative_only cannot be true at the same time.") + raise ValueError( + "Positive_only and negative_only cannot be true at the same time." + ) segments = self.segments image = self.image local_weights_label = self.local_weights[label] @@ -288,14 +297,20 @@ class ImageInterpretation(object): else: temp = self.image.copy() if positive_only: - fs = [x[0] for x in local_weights_label - if x[1] > 0 and x[1] > min_weight][:num_features] + fs = [ + x[0] for x in local_weights_label + if x[1] > 0 and x[1] > min_weight + ][:num_features] if negative_only: - fs = [x[0] for x in local_weights_label - if x[1] < 0 and abs(x[1]) > min_weight][:num_features] + fs = [ + x[0] for x in local_weights_label + if x[1] < 0 and abs(x[1]) > min_weight + ][:num_features] if positive_only or negative_only: + c = 1 if positive_only else 0 for f in fs: - temp[segments == f] = image[segments == f].copy() + temp[segments == f] = [0, 255, 0] + # temp[segments == f, c] = np.max(image) mask[segments == f] = 1 return temp, mask else: @@ -329,8 +344,11 @@ class ImageInterpretation(object): temp = np.zeros_like(image) weight_max = abs(local_weights_label[0][1]) - local_weights_label = [(f, w/weight_max) for f, w in local_weights_label] - local_weights_label = sorted(local_weights_label, key=lambda x: x[1], reverse=True) # negatives are at last. + local_weights_label = [(f, w / weight_max) + for f, w in local_weights_label] + local_weights_label = sorted( + local_weights_label, key=lambda x: x[1], + reverse=True) # negatives are at last. cmaps = cm.get_cmap('Spectral') colors = cmaps(np.linspace(0, 1, len(local_weights_label))) @@ -353,8 +371,12 @@ class LimeImageInterpreter(object): feature that is 1 when the value is the same as the instance being interpreted.""" - def __init__(self, kernel_width=.25, kernel=None, verbose=False, - feature_selection='auto', random_state=None): + def __init__(self, + kernel_width=.25, + kernel=None, + verbose=False, + feature_selection='auto', + random_state=None): """Init function. Args: @@ -376,22 +398,27 @@ class LimeImageInterpreter(object): kernel_width = float(kernel_width) if kernel is None: + def kernel(d, kernel_width): - return np.sqrt(np.exp(-(d ** 2) / kernel_width ** 2)) + return np.sqrt(np.exp(-(d**2) / kernel_width**2)) kernel_fn = partial(kernel, kernel_width=kernel_width) self.random_state = check_random_state(random_state) self.feature_selection = feature_selection - self.base = LimeBase(kernel_fn, verbose, random_state=self.random_state) + self.base = LimeBase( + kernel_fn, verbose, random_state=self.random_state) - def interpret_instance(self, image, classifier_fn, labels=(1,), + def interpret_instance(self, + image, + classifier_fn, + labels=(1, ), hide_color=None, - num_features=100000, num_samples=1000, + num_features=100000, + num_samples=1000, batch_size=10, distance_metric='cosine', - model_regressor=None - ): + model_regressor=None): """Generates interpretations for a prediction. First, we generate neighborhood data by randomly perturbing features @@ -434,6 +461,7 @@ class LimeImageInterpreter(object): self.segments = segments fudged_image = image.copy() + # global_mean = np.mean(image, (0, 1)) if hide_color is None: # if no hide_color, use the mean for x in np.unique(segments): @@ -451,7 +479,6 @@ class LimeImageInterpreter(object): d = cdist(centroids, centroids, 'sqeuclidean') for x in np.unique(segments): - # print(np.argmin(d[x])) a = [image[segments == i] for i in np.argsort(d[x])[1:6]] mx = np.mean(np.concatenate(a), axis=0) fudged_image[segments == x] = mx @@ -461,24 +488,30 @@ class LimeImageInterpreter(object): top = labels - data, labels = self.data_labels(image, fudged_image, segments, - classifier_fn, num_samples, - batch_size=batch_size) + data, labels = self.data_labels( + image, + fudged_image, + segments, + classifier_fn, + num_samples, + batch_size=batch_size) distances = sklearn.metrics.pairwise_distances( - data, - data[0].reshape(1, -1), - metric=distance_metric - ).ravel() + data, data[0].reshape(1, -1), metric=distance_metric).ravel() interpretation_image = ImageInterpretation(image, segments) for label in top: (interpretation_image.intercept[label], interpretation_image.local_weights[label], - interpretation_image.score, interpretation_image.local_pred) = self.base.interpret_instance_with_data( - data, labels, distances, label, num_features, - model_regressor=model_regressor, - feature_selection=self.feature_selection) + interpretation_image.score, interpretation_image.local_pred + ) = self.base.interpret_instance_with_data( + data, + labels, + distances, + label, + num_features, + model_regressor=model_regressor, + feature_selection=self.feature_selection) return interpretation_image def data_labels(self, @@ -511,6 +544,9 @@ class LimeImageInterpreter(object): labels = [] data[0, :] = 1 imgs = [] + + logging.info("Computing LIME.", use_color=True) + for row in tqdm.tqdm(data): temp = copy.deepcopy(image) zeros = np.where(row == 0)[0] diff --git a/paddlex/interpret/core/normlime_base.py b/paddlex/interpret/core/normlime_base.py index ca7b79c31cff02c018838d31e337b9c65762a7f6..8270099b17c858688903354bffcfa412ed8c804c 100644 --- a/paddlex/interpret/core/normlime_base.py +++ b/paddlex/interpret/core/normlime_base.py @@ -1,26 +1,28 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import os.path as osp import numpy as np import glob +import tqdm from paddlex.interpret.as_data_reader.readers import read_image import paddlex.utils.logging as logging from . import lime_base from ._session_preparation import compute_features_for_kmeans, gen_user_home +import paddlex.utils.logging as logging def load_kmeans_model(fname): @@ -37,18 +39,24 @@ def combine_normlime_and_lime(lime_weights, g_weights): for y in pred_labels: normlized_lime_weights_y = lime_weights[y] - lime_weights_dict = {tuple_w[0]: tuple_w[1] for tuple_w in normlized_lime_weights_y} + lime_weights_dict = { + tuple_w[0]: tuple_w[1] + for tuple_w in normlized_lime_weights_y + } normlized_g_weight_y = g_weights[y] - normlime_weights_dict = {tuple_w[0]: tuple_w[1] for tuple_w in normlized_g_weight_y} + normlime_weights_dict = { + tuple_w[0]: tuple_w[1] + for tuple_w in normlized_g_weight_y + } combined_weights[y] = [ (seg_k, lime_weights_dict[seg_k] * normlime_weights_dict[seg_k]) for seg_k in lime_weights_dict.keys() ] - combined_weights[y] = sorted(combined_weights[y], - key=lambda x: np.abs(x[1]), reverse=True) + combined_weights[y] = sorted( + combined_weights[y], key=lambda x: np.abs(x[1]), reverse=True) return combined_weights @@ -66,8 +74,8 @@ def centroid_using_superpixels(features, segments): regions = regionprops(segments + 1) one_list = np.zeros((len(np.unique(segments)), features.shape[2])) for i, r in enumerate(regions): - one_list[i] = features[int(r.centroid[0] + 0.5), int(r.centroid[1] + 0.5), :] - # print(one_list.shape) + one_list[i] = features[int(r.centroid[0] + 0.5), int(r.centroid[1] + + 0.5), :] return one_list @@ -80,30 +88,39 @@ def get_feature_for_kmeans(feature_map, segments): return x -def precompute_normlime_weights(list_data_, predict_fn, num_samples=3000, batch_size=50, save_dir='./tmp'): +def precompute_normlime_weights(list_data_, + predict_fn, + num_samples=3000, + batch_size=50, + save_dir='./tmp'): # save lime weights and kmeans cluster labels - precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, save_dir) + precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, + save_dir) # load precomputed results, compute normlime weights and save. - fname_list = glob.glob(os.path.join(save_dir, f'lime_weights_s{num_samples}*.npy')) + fname_list = glob.glob( + os.path.join(save_dir, 'lime_weights_s{}*.npy'.format(num_samples))) return compute_normlime_weights(fname_list, save_dir, num_samples) -def save_one_lime_predict_and_kmean_labels(lime_all_weights, image_pred_labels, cluster_labels, save_path): +def save_one_lime_predict_and_kmean_labels(lime_all_weights, image_pred_labels, + cluster_labels, save_path): lime_weights = {} for label in image_pred_labels: lime_weights[label] = lime_all_weights[label] for_normlime_weights = { - 'lime_weights': lime_weights, # a dict: class_label: (seg_label, weight) + 'lime_weights': + lime_weights, # a dict: class_label: (seg_label, weight) 'cluster': cluster_labels # a list with segments as indices. } np.save(save_path, for_normlime_weights) -def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, save_dir): +def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, + save_dir): root_path = gen_user_home() root_path = osp.join(root_path, '.paddlex') h_pre_models = osp.join(root_path, "pre_models") @@ -117,17 +134,24 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav for data_index, each_data_ in enumerate(list_data_): if isinstance(each_data_, str): - save_path = f"lime_weights_s{num_samples}_{each_data_.split('/')[-1].split('.')[0]}.npy" + save_path = "lime_weights_s{}_{}.npy".format( + num_samples, each_data_.split('/')[-1].split('.')[0]) save_path = os.path.join(save_dir, save_path) else: - save_path = f"lime_weights_s{num_samples}_{data_index}.npy" + save_path = "lime_weights_s{}_{}.npy".format(num_samples, + data_index) save_path = os.path.join(save_dir, save_path) if os.path.exists(save_path): - logging.info(save_path + ' exists, not computing this one.', use_color=True) + logging.info( + save_path + ' exists, not computing this one.', use_color=True) continue - img_file_name = each_data_ if isinstance(each_data_, str) else data_index - logging.info('processing '+ img_file_name + ' [{}/{}]'.format(data_index, len(list_data_)), use_color=True) + img_file_name = each_data_ if isinstance(each_data_, + str) else data_index + logging.info( + 'processing ' + img_file_name + ' [{}/{}]'.format(data_index, + len(list_data_)), + use_color=True) image_show = read_image(each_data_) result = predict_fn(image_show) @@ -156,34 +180,40 @@ def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, sav pred_label = pred_label[:top_k] algo = lime_base.LimeImageInterpreter() - interpreter = algo.interpret_instance(image_show[0], predict_fn, pred_label, 0, - num_samples=num_samples, batch_size=batch_size) - - X = get_feature_for_kmeans(compute_features_for_kmeans(image_show).transpose((1, 2, 0)), interpreter.segments) + interpreter = algo.interpret_instance( + image_show[0], + predict_fn, + pred_label, + 0, + num_samples=num_samples, + batch_size=batch_size) + + X = get_feature_for_kmeans( + compute_features_for_kmeans(image_show).transpose((1, 2, 0)), + interpreter.segments) try: cluster_labels = kmeans_model.predict(X) except AttributeError: from sklearn.metrics import pairwise_distances_argmin_min - cluster_labels, _ = pairwise_distances_argmin_min(X, kmeans_model.cluster_centers_) + cluster_labels, _ = pairwise_distances_argmin_min( + X, kmeans_model.cluster_centers_) save_one_lime_predict_and_kmean_labels( - interpreter.local_weights, pred_label, - cluster_labels, - save_path - ) + interpreter.local_weights, pred_label, cluster_labels, save_path) def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples): normlime_weights_all_labels = {} + for f in a_list_lime_fnames: try: lime_weights_and_cluster = np.load(f, allow_pickle=True).item() lime_weights = lime_weights_and_cluster['lime_weights'] cluster = lime_weights_and_cluster['cluster'] except: - print('When loading precomputed LIME result, skipping', f) + logging.info('When loading precomputed LIME result, skipping' + + str(f)) continue - print('Loading precomputed LIME result,', f) - + logging.info('Loading precomputed LIME result,' + str(f)) pred_labels = lime_weights.keys() for y in pred_labels: normlime_weights = normlime_weights_all_labels.get(y, {}) @@ -203,32 +233,183 @@ def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples): for y in normlime_weights_all_labels: normlime_weights = normlime_weights_all_labels.get(y, {}) for k in normlime_weights: - normlime_weights[k] = sum(normlime_weights[k]) / len(normlime_weights[k]) + normlime_weights[k] = sum(normlime_weights[k]) / len( + normlime_weights[k]) # check normlime - if len(normlime_weights_all_labels.keys()) < max(normlime_weights_all_labels.keys()) + 1: - print( - "\n" - "Warning: !!! \n" - f"There are at least {max(normlime_weights_all_labels.keys()) + 1} classes, " - f"but the NormLIME has results of only {len(normlime_weights_all_labels.keys())} classes. \n" - "It may have cause unstable results in the later computation" - " but can be improved by computing more test samples." + if len(normlime_weights_all_labels.keys()) < max( + normlime_weights_all_labels.keys()) + 1: + logging.info( + "\n" + \ + "Warning: !!! \n" + \ + "There are at least {} classes, ".format(max(normlime_weights_all_labels.keys()) + 1) + \ + "but the NormLIME has results of only {} classes. \n".format(len(normlime_weights_all_labels.keys())) + \ + "It may have cause unstable results in the later computation" + \ + " but can be improved by computing more test samples." + \ "\n" ) n = 0 - f_out = f'normlime_weights_s{lime_num_samples}_samples_{len(a_list_lime_fnames)}-{n}.npy' - while os.path.exists( - os.path.join(save_dir, f_out) - ): + f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format( + lime_num_samples, len(a_list_lime_fnames), n) + while os.path.exists(os.path.join(save_dir, f_out)): n += 1 - f_out = f'normlime_weights_s{lime_num_samples}_samples_{len(a_list_lime_fnames)}-{n}.npy' + f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format( + lime_num_samples, len(a_list_lime_fnames), n) continue - np.save( - os.path.join(save_dir, f_out), - normlime_weights_all_labels - ) + np.save(os.path.join(save_dir, f_out), normlime_weights_all_labels) return os.path.join(save_dir, f_out) + +def precompute_global_classifier(dataset, + predict_fn, + save_path, + batch_size=50, + max_num_samples=1000): + from sklearn.linear_model import LogisticRegression + + root_path = gen_user_home() + root_path = osp.join(root_path, '.paddlex') + h_pre_models = osp.join(root_path, "pre_models") + if not osp.exists(h_pre_models): + if not osp.exists(root_path): + os.makedirs(root_path) + url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz" + pdx.utils.download_and_decompress(url, path=root_path) + h_pre_models_kmeans = osp.join(h_pre_models, "kmeans_model.pkl") + kmeans_model = load_kmeans_model(h_pre_models_kmeans) + + image_list = [] + for item in dataset.file_list: + image_list.append(item[0]) + + x_data = [] + y_labels = [] + + num_features = len(kmeans_model.cluster_centers_) + + logging.info( + "Initialization for NormLIME: Computing each sample in the test list.", + use_color=True) + + for each_data_ in tqdm.tqdm(image_list): + x_data_i = np.zeros((num_features)) + image_show = read_image(each_data_) + result = predict_fn(image_show) + result = result[0] # only one image here. + c = compute_features_for_kmeans(image_show).transpose((1, 2, 0)) + + segments = np.zeros((image_show.shape[1], image_show.shape[2]), + np.int32) + num_blocks = 10 + height_per_i = segments.shape[0] // num_blocks + 1 + width_per_i = segments.shape[1] // num_blocks + 1 + + for i in range(segments.shape[0]): + for j in range(segments.shape[1]): + segments[i, + j] = i // height_per_i * num_blocks + j // width_per_i + + # segments = quickshift(image_show[0], sigma=1) + X = get_feature_for_kmeans(c, segments) + + try: + cluster_labels = kmeans_model.predict(X) + except AttributeError: + from sklearn.metrics import pairwise_distances_argmin_min + cluster_labels, _ = pairwise_distances_argmin_min( + X, kmeans_model.cluster_centers_) + + for c in cluster_labels: + x_data_i[c] = 1 + + # x_data_i /= len(cluster_labels) + + pred_y_i = np.argmax(result) + y_labels.append(pred_y_i) + x_data.append(x_data_i) + + if len(np.unique(y_labels)) < 2: + logging.info("Warning: The test samples in the dataset is limited.\n \ + NormLIME may have no effect on the results.\n \ + Try to add more test samples, or see the results of LIME.") + num_classes = np.max(np.unique(y_labels)) + 1 + normlime_weights_all_labels = {} + for class_index in range(num_classes): + w = np.ones((num_features)) / num_features + normlime_weights_all_labels[class_index] = { + i: wi + for i, wi in enumerate(w) + } + logging.info("Saving the computed normlime_weights in {}".format( + save_path)) + + np.save(save_path, normlime_weights_all_labels) + return save_path + + clf = LogisticRegression(multi_class='multinomial', max_iter=1000) + clf.fit(x_data, y_labels) + + num_classes = np.max(np.unique(y_labels)) + 1 + normlime_weights_all_labels = {} + + if len(y_labels) / len(np.unique(y_labels)) < 3: + logging.info("Warning: The test samples in the dataset is limited.\n \ + NormLIME may have no effect on the results.\n \ + Try to add more test samples, or see the results of LIME.") + + if len(np.unique(y_labels)) == 2: + # binary: clf.coef_ has shape of [1, num_features] + for class_index in range(num_classes): + if class_index not in clf.classes_: + w = np.ones((num_features)) / num_features + normlime_weights_all_labels[class_index] = { + i: wi + for i, wi in enumerate(w) + } + continue + + if clf.classes_[0] == class_index: + w = -clf.coef_[0] + else: + w = clf.coef_[0] + + # softmax + w = w - np.max(w) + exp_w = np.exp(w * 10) + w = exp_w / np.sum(exp_w) + + normlime_weights_all_labels[class_index] = { + i: wi + for i, wi in enumerate(w) + } + else: + # clf.coef_ has shape of [len(np.unique(y_labels)), num_features] + for class_index in range(num_classes): + if class_index not in clf.classes_: + w = np.ones((num_features)) / num_features + normlime_weights_all_labels[class_index] = { + i: wi + for i, wi in enumerate(w) + } + continue + + coef_class_index = np.where(clf.classes_ == class_index)[0][0] + w = clf.coef_[coef_class_index] + + # softmax + w = w - np.max(w) + exp_w = np.exp(w * 10) + w = exp_w / np.sum(exp_w) + + normlime_weights_all_labels[class_index] = { + i: wi + for i, wi in enumerate(w) + } + + logging.info("Saving the computed normlime_weights in {}".format( + save_path)) + np.save(save_path, normlime_weights_all_labels) + + return save_path diff --git a/paddlex/interpret/interpretation_predict.py b/paddlex/interpret/interpretation_predict.py index 198f949ac7f13117fb51b7240d532eabf1c669eb..31b3b47e86613f62ba1c63b4ba2041357cc6bdc7 100644 --- a/paddlex/interpret/interpretation_predict.py +++ b/paddlex/interpret/interpretation_predict.py @@ -13,17 +13,26 @@ # limitations under the License. import numpy as np +import cv2 +import copy + def interpretation_predict(model, images): - model.arrange_transforms( - transforms=model.test_transforms, mode='test') + images = images.astype('float32') + model.arrange_transforms(transforms=model.test_transforms, mode='test') + tmp_transforms = copy.deepcopy(model.test_transforms.transforms) + model.test_transforms.transforms = model.test_transforms.transforms[-2:] + new_imgs = [] for i in range(images.shape[0]): - img = images[i] - new_imgs.append(model.test_transforms(img)[0]) + images[i] = cv2.cvtColor(images[i], cv2.COLOR_RGB2BGR) + new_imgs.append(model.test_transforms(images[i])[0]) + new_imgs = np.array(new_imgs) - result = model.exe.run( - model.test_prog, - feed={'image': new_imgs}, - fetch_list=list(model.interpretation_feats.values())) - return result \ No newline at end of file + out = model.exe.run(model.test_prog, + feed={'image': new_imgs}, + fetch_list=list(model.interpretation_feats.values())) + + model.test_transforms.transforms = tmp_transforms + + return out diff --git a/paddlex/interpret/visualize.py b/paddlex/interpret/visualize.py index de8e9151b9417fd3307c74d7bb67767bed1845c7..c1b013d04b9b21a49ecf7eeb6dd65b6d6c578069 100644 --- a/paddlex/interpret/visualize.py +++ b/paddlex/interpret/visualize.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import cv2 @@ -20,79 +20,79 @@ import numpy as np import paddlex as pdx from .interpretation_predict import interpretation_predict from .core.interpretation import Interpretation -from .core.normlime_base import precompute_normlime_weights +from .core.normlime_base import precompute_global_classifier from .core._session_preparation import gen_user_home - -def lime(img_file, - model, - num_samples=3000, - batch_size=50, - save_dir='./'): - """使用LIME算法将模型预测结果的可解释性可视化。 - + + +def lime(img_file, model, num_samples=3000, batch_size=50, save_dir='./'): + """使用LIME算法将模型预测结果的可解释性可视化。 + LIME表示与模型无关的局部可解释性,可以解释任何模型。LIME的思想是以输入样本为中心, 在其附近的空间中进行随机采样,每个采样通过原模型得到新的输出,这样得到一系列的输入 和对应的输出,LIME用一个简单的、可解释的模型(比如线性回归模型)来拟合这个映射关系, - 得到每个输入维度的权重,以此来解释模型。 - + 得到每个输入维度的权重,以此来解释模型。 + 注意:LIME可解释性结果可视化目前只支持分类模型。 - + Args: img_file (str): 预测图像路径。 model (paddlex.cv.models): paddlex中的模型。 num_samples (int): LIME用于学习线性模型的采样数,默认为3000。 batch_size (int): 预测数据batch大小,默认为50。 - save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。 + save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。 """ assert model.model_type == 'classifier', \ 'Now the interpretation visualize only be supported in classifier!' if model.status != 'Normal': - raise Exception('The interpretation only can deal with the Normal model') + raise Exception( + 'The interpretation only can deal with the Normal model') if not osp.exists(save_dir): os.makedirs(save_dir) - model.arrange_transforms( - transforms=model.test_transforms, mode='test') + model.arrange_transforms(transforms=model.test_transforms, mode='test') tmp_transforms = copy.deepcopy(model.test_transforms) tmp_transforms.transforms = tmp_transforms.transforms[:-2] img = tmp_transforms(img_file)[0] img = np.around(img).astype('uint8') img = np.expand_dims(img, axis=0) interpreter = None - interpreter = get_lime_interpreter(img, model, num_samples=num_samples, batch_size=batch_size) + interpreter = get_lime_interpreter( + img, model, num_samples=num_samples, batch_size=batch_size) img_name = osp.splitext(osp.split(img_file)[-1])[0] - interpreter.interpret(img, save_dir=save_dir) - - -def normlime(img_file, - model, - dataset=None, - num_samples=3000, - batch_size=50, - save_dir='./'): + interpreter.interpret(img, save_dir=osp.join(save_dir, img_name)) + + +def normlime(img_file, + model, + dataset=None, + num_samples=3000, + batch_size=50, + save_dir='./', + normlime_weights_file=None): """使用NormLIME算法将模型预测结果的可解释性可视化。 - + NormLIME是利用一定数量的样本来出一个全局的解释。NormLIME会提前计算一定数量的测 试样本的LIME结果,然后对相同的特征进行权重的归一化,这样来得到一个全局的输入和输出的关系。 - + 注意1:dataset读取的是一个数据集,该数据集不宜过大,否则计算时间会较长,但应包含所有类别的数据。 注意2:NormLIME可解释性结果可视化目前只支持分类模型。 - + Args: img_file (str): 预测图像路径。 model (paddlex.cv.models): paddlex中的模型。 dataset (paddlex.datasets): 数据集读取器,默认为None。 num_samples (int): LIME用于学习线性模型的采样数,默认为3000。 batch_size (int): 预测数据batch大小,默认为50。 - save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。 + save_dir (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。 + normlime_weights_file (str): NormLIME初始化文件名,若不存在,则计算一次,保存于该路径;若存在,则直接载入。 """ assert model.model_type == 'classifier', \ 'Now the interpretation visualize only be supported in classifier!' if model.status != 'Normal': - raise Exception('The interpretation only can deal with the Normal model') + raise Exception( + 'The interpretation only can deal with the Normal model') if not osp.exists(save_dir): os.makedirs(save_dir) - model.arrange_transforms( - transforms=model.test_transforms, mode='test') + model.arrange_transforms(transforms=model.test_transforms, mode='test') tmp_transforms = copy.deepcopy(model.test_transforms) tmp_transforms.transforms = tmp_transforms.transforms[:-2] img = tmp_transforms(img_file)[0] @@ -100,52 +100,48 @@ def normlime(img_file, img = np.expand_dims(img, axis=0) interpreter = None if dataset is None: - raise Exception('The dataset is None. Cannot implement this kind of interpretation') - interpreter = get_normlime_interpreter(img, model, dataset, - num_samples=num_samples, batch_size=batch_size, - save_dir=save_dir) + raise Exception( + 'The dataset is None. Cannot implement this kind of interpretation') + interpreter = get_normlime_interpreter( + img, + model, + dataset, + num_samples=num_samples, + batch_size=batch_size, + save_dir=save_dir, + normlime_weights_file=normlime_weights_file) img_name = osp.splitext(osp.split(img_file)[-1])[0] - interpreter.interpret(img, save_dir=save_dir) - - + interpreter.interpret(img, save_dir=osp.join(save_dir, img_name)) + + def get_lime_interpreter(img, model, num_samples=3000, batch_size=50): def predict_func(image): - image = image.astype('float32') - for i in range(image.shape[0]): - image[i] = cv2.cvtColor(image[i], cv2.COLOR_RGB2BGR) - tmp_transforms = copy.deepcopy(model.test_transforms.transforms) - model.test_transforms.transforms = model.test_transforms.transforms[-2:] out = interpretation_predict(model, image) - model.test_transforms.transforms = tmp_transforms return out[0] + labels_name = None if hasattr(model, 'labels'): labels_name = model.labels - interpreter = Interpretation('lime', - predict_func, - labels_name, - num_samples=num_samples, - batch_size=batch_size) + interpreter = Interpretation( + 'lime', + predict_func, + labels_name, + num_samples=num_samples, + batch_size=batch_size) return interpreter -def get_normlime_interpreter(img, model, dataset, num_samples=3000, batch_size=50, save_dir='./'): - def precompute_predict_func(image): - image = image.astype('float32') - tmp_transforms = copy.deepcopy(model.test_transforms.transforms) - model.test_transforms.transforms = model.test_transforms.transforms[-2:] - out = interpretation_predict(model, image) - model.test_transforms.transforms = tmp_transforms - return out[0] +def get_normlime_interpreter(img, + model, + dataset, + num_samples=3000, + batch_size=50, + save_dir='./', + normlime_weights_file=None): def predict_func(image): - image = image.astype('float32') - for i in range(image.shape[0]): - image[i] = cv2.cvtColor(image[i], cv2.COLOR_RGB2BGR) - tmp_transforms = copy.deepcopy(model.test_transforms.transforms) - model.test_transforms.transforms = model.test_transforms.transforms[-2:] out = interpretation_predict(model, image) - model.test_transforms.transforms = tmp_transforms return out[0] + labels_name = None if dataset is not None: labels_name = dataset.labels @@ -157,28 +153,29 @@ def get_normlime_interpreter(img, model, dataset, num_samples=3000, batch_size=5 os.makedirs(root_path) url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz" pdx.utils.download_and_decompress(url, path=root_path) - npy_dir = precompute_for_normlime(precompute_predict_func, - dataset, - num_samples=num_samples, - batch_size=batch_size, - save_dir=save_dir) - interpreter = Interpretation('normlime', - predict_func, - labels_name, - num_samples=num_samples, - batch_size=batch_size, - normlime_weights=npy_dir) - return interpreter - -def precompute_for_normlime(predict_func, dataset, num_samples=3000, batch_size=50, save_dir='./'): - image_list = [] - for item in dataset.file_list: - image_list.append(item[0]) - return precompute_normlime_weights( - image_list, + if osp.exists(osp.join(save_dir, normlime_weights_file)): + normlime_weights_file = osp.join(save_dir, normlime_weights_file) + try: + np.load(normlime_weights_file, allow_pickle=True).item() + except: + normlime_weights_file = precompute_global_classifier( + dataset, + predict_func, + save_path=normlime_weights_file, + batch_size=batch_size) + else: + normlime_weights_file = precompute_global_classifier( + dataset, predict_func, - num_samples=num_samples, - batch_size=batch_size, - save_dir=save_dir) - + save_path=osp.join(save_dir, normlime_weights_file), + batch_size=batch_size) + + interpreter = Interpretation( + 'normlime', + predict_func, + labels_name, + num_samples=num_samples, + batch_size=batch_size, + normlime_weights=normlime_weights_file) + return interpreter diff --git a/paddlex/seg.py b/paddlex/seg.py index 0f92813d45b4e7f5e08ee64fbd6cfa675087ba4a..fdfdffd4639c6b3ddb75ac20ca0b3ecf4edd2328 100644 --- a/paddlex/seg.py +++ b/paddlex/seg.py @@ -17,5 +17,7 @@ from . import cv UNet = cv.models.UNet DeepLabv3p = cv.models.DeepLabv3p +HRNet = cv.models.HRNet +FastSCNN = cv.models.FastSCNN transforms = cv.transforms.seg_transforms visualize = cv.models.utils.visualize.visualize_segmentation diff --git a/paddlex/slim.py b/paddlex/slim.py index 57fc104d75307ac13ead57d12717490eb8154acf..407119dc624b9d74807cb9215e00eb3144b7093f 100644 --- a/paddlex/slim.py +++ b/paddlex/slim.py @@ -31,4 +31,4 @@ def export_quant_model(model, batch_size=batch_size, batch_num=batch_num, save_dir=save_dir, - cache_dir='./temp') + cache_dir=cache_dir) diff --git a/paddlex/tools/x2coco.py b/paddlex/tools/x2coco.py index 595a6e6d328d08ff9aa8000c5afb465786c31431..4c893dcc9319ffc4353d4e376a802301d047120a 100644 --- a/paddlex/tools/x2coco.py +++ b/paddlex/tools/x2coco.py @@ -110,7 +110,7 @@ class LabelMe2COCO(X2COCO): annotation["segmentation"] = [list(np.asarray(points).flatten())] annotation["iscrowd"] = 0 annotation["image_id"] = image_id + 1 - annotation["bbox"] = list(map(float, get_bbox(height, width, points))) + annotation["bbox"] = list(map(float, self.get_bbox(height, width, points))) annotation["area"] = annotation["bbox"][2] * annotation["bbox"][3] annotation["category_id"] = label_to_num[label] annotation["id"] = object_id + 1 @@ -254,4 +254,4 @@ class EasyData2COCO(X2COCO): segmentation.append(contour_list) self.annotations_list.append( self.generate_polygon_anns_field(points, segmentation, label, image_id, object_id, - label_to_num)) \ No newline at end of file + label_to_num)) diff --git a/paddlex/utils/__init__.py b/paddlex/utils/__init__.py index ff774c985feb6ffc24a3e8c67237cdff0a074ee4..19c86d754b9b99219fdbf7be4b5e7fa6cffe6346 100644 --- a/paddlex/utils/__init__.py +++ b/paddlex/utils/__init__.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from . import logging diff --git a/paddlex/utils/logging.py b/paddlex/utils/logging.py index e5deb7388459f1052fd90d758d09aad759592ec8..adfcea515273286f37921ec13999fb2234ce404f 100644 --- a/paddlex/utils/logging.py +++ b/paddlex/utils/logging.py @@ -29,13 +29,11 @@ def log(level=2, message="", use_color=False): current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array) if paddlex.log_level >= level: if use_color: - print("\033[1;31;40m{} [{}]\t{}\033[0m".format( - current_time, levels[level], - message).encode("utf-8").decode("latin1")) + print("\033[1;31;40m{} [{}]\t{}\033[0m".format(current_time, levels[ + level], message).encode("utf-8").decode("latin1")) else: - print( - "{} [{}]\t{}".format(current_time, levels[level], - message).encode("utf-8").decode("latin1")) + print("{} [{}]\t{}".format(current_time, levels[level], message) + .encode("utf-8").decode("latin1")) sys.stdout.flush() @@ -47,9 +45,11 @@ def info(message="", use_color=False): log(level=2, message=message, use_color=use_color) -def warning(message="", use_color=False): +def warning(message="", use_color=True): log(level=1, message=message, use_color=use_color) -def error(message="", use_color=False): +def error(message="", use_color=True, exit=True): log(level=0, message=message, use_color=use_color) + if exit: + sys.exit(-1) diff --git a/paddlex/utils/utils.py b/paddlex/utils/utils.py index fe14022152db3a8f9853c93afd20745adb30c1f0..d9005875ea6c793269a8c67e065b69bd7100dbe8 100644 --- a/paddlex/utils/utils.py +++ b/paddlex/utils/utils.py @@ -67,8 +67,8 @@ def parse_param_file(param_file, return_shape=True): f.close() return tuple(tensor_desc.dims) if tensor_desc.data_type != 5: - raise Exception( - "Unexpected data type while parse {}".format(param_file)) + raise Exception("Unexpected data type while parse {}".format( + param_file)) data_size = 4 for i in range(len(tensor_shape)): data_size *= tensor_shape[i] @@ -139,7 +139,12 @@ def load_pdparams(exe, main_prog, model_dir): vars_to_load = list() import pickle - with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f: + + if osp.isfile(model_dir): + params_file = model_dir + else: + params_file = osp.join(model_dir, 'model.pdparams') + with open(params_file, 'rb') as f: params_dict = pickle.load(f) if six.PY2 else pickle.load( f, encoding='latin1') unused_vars = list() @@ -185,8 +190,8 @@ def is_belong_to_optimizer(var): import paddle.fluid as fluid from paddle.fluid.proto.framework_pb2 import VarType - if not (isinstance(var, fluid.framework.Parameter) - or var.desc.need_check_feed()): + if not (isinstance(var, fluid.framework.Parameter) or + var.desc.need_check_feed()): return is_persistable(var) return False @@ -206,9 +211,8 @@ def load_pdopt(exe, main_prog, model_dir): if len(optimizer_var_list) > 0: for var in optimizer_var_list: if var.name not in opt_dict: - raise Exception( - "{} is not in saved paddlex optimizer, {}".format( - var.name, exception_message)) + raise Exception("{} is not in saved paddlex optimizer, {}". + format(var.name, exception_message)) if var.shape != opt_dict[var.name].shape: raise Exception( "Shape of optimizer variable {} doesn't match.(Last: {}, Now: {}), {}" @@ -227,9 +231,8 @@ def load_pdopt(exe, main_prog, model_dir): "There is no optimizer parameters in the model, please set the optimizer!" ) else: - logging.info( - "There are {} optimizer parameters in {} are loaded.".format( - len(optimizer_var_list), model_dir)) + logging.info("There are {} optimizer parameters in {} are loaded.". + format(len(optimizer_var_list), model_dir)) def load_pretrain_weights(exe, @@ -239,6 +242,12 @@ def load_pretrain_weights(exe, resume=False): if not osp.exists(weights_dir): raise Exception("Path {} not exists.".format(weights_dir)) + if osp.isfile(weights_dir): + if not weights_dir.endswith('.pdparams'): + raise Exception("File {} is not a paddle parameter file".format( + weights_dir)) + load_pdparams(exe, main_prog, weights_dir) + return if osp.exists(osp.join(weights_dir, "model.pdparams")): load_pdparams(exe, main_prog, weights_dir) if resume: @@ -255,9 +264,8 @@ def load_pretrain_weights(exe, if not isinstance(var, fluid.framework.Parameter): continue if not osp.exists(osp.join(weights_dir, var.name)): - logging.debug( - "[SKIP] Pretrained weight {}/{} doesn't exist".format( - weights_dir, var.name)) + logging.debug("[SKIP] Pretrained weight {}/{} doesn't exist". + format(weights_dir, var.name)) continue pretrained_shape = parse_param_file(osp.join(weights_dir, var.name)) actual_shape = tuple(var.shape) @@ -269,11 +277,9 @@ def load_pretrain_weights(exe, vars_to_load.append(var) logging.debug("Weight {} will be load".format(var.name)) - fluid.io.load_vars( - executor=exe, - dirname=weights_dir, - main_program=main_prog, - vars=vars_to_load) + params_dict = fluid.io.load_program_state( + weights_dir, var_list=vars_to_load) + fluid.io.set_program_state(main_prog, params_dict) if len(vars_to_load) == 0: logging.warning( "There is no pretrain weights loaded, maybe you should check you pretrain model!" @@ -319,9 +325,8 @@ def load_pretrain_weights(exe, "There is no optimizer parameters in the model, please set the optimizer!" ) else: - logging.info( - "There are {} optimizer parameters in {} are loaded.".format( - len(optimizer_var_list), weights_dir)) + logging.info("There are {} optimizer parameters in {} are loaded.". + format(len(optimizer_var_list), weights_dir)) class EarlyStop: @@ -344,12 +349,12 @@ class EarlyStop: self.max = current_score return False else: - if (abs(self.score - current_score) < self.thresh - or current_score < self.score): + if (abs(self.score - current_score) < self.thresh or + current_score < self.score): self.counter += 1 self.score = current_score - logging.debug( - "EarlyStopping: %i / %i" % (self.counter, self.patience)) + logging.debug("EarlyStopping: %i / %i" % + (self.counter, self.patience)) if self.counter >= self.patience: logging.info("EarlyStopping: Stop training") return True diff --git a/setup.py b/setup.py index a044495c902f6b754a69265c5020d7dbda992b14..44aca0f9dc2a214ff4bcf4e2817d06423c26812b 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ long_description = "PaddleX. A end-to-end deeplearning model development toolkit setuptools.setup( name="paddlex", - version='1.0.2', + version='1.0.6', author="paddlex", author_email="paddlex@baidu.com", description=long_description, diff --git a/tools/codestyle/clang_format.hook b/tools/codestyle/clang_format.hook new file mode 100755 index 0000000000000000000000000000000000000000..1d928216867c0ba3897d71542fea44debf8d72a0 --- /dev/null +++ b/tools/codestyle/clang_format.hook @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +readonly VERSION="3.8" + +version=$(clang-format -version) + +if ! [[ $version == *"$VERSION"* ]]; then + echo "clang-format version check failed." + echo "a version contains '$VERSION' is needed, but get '$version'" + echo "you can install the right version, and make an soft-link to '\$PATH' env" + exit -1 +fi + +clang-format $@ diff --git a/tools/codestyle/cpplint_pre_commit.hook b/tools/codestyle/cpplint_pre_commit.hook new file mode 100755 index 0000000000000000000000000000000000000000..630aeb8caaf88139fe8efae5c1f7e27f258d25c1 --- /dev/null +++ b/tools/codestyle/cpplint_pre_commit.hook @@ -0,0 +1,27 @@ +#!/bin/bash + +TOTAL_ERRORS=0 +if [[ ! $TRAVIS_BRANCH ]]; then + # install cpplint on local machine. + if [[ ! $(which cpplint) ]]; then + pip install cpplint + fi + # diff files on local machine. + files=$(git diff --cached --name-status | awk '$1 != "D" {print $2}') +else + # diff files between PR and latest commit on Travis CI. + branch_ref=$(git rev-parse "$TRAVIS_BRANCH") + head_ref=$(git rev-parse HEAD) + files=$(git diff --name-status $branch_ref $head_ref | awk '$1 != "D" {print $2}') +fi +# The trick to remove deleted files: https://stackoverflow.com/a/2413151 +for file in $files; do + if [[ $file =~ ^(patches/.*) ]]; then + continue; + else + cpplint --filter=-readability/fn_size $file; + TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?); + fi +done + +exit $TOTAL_ERRORS diff --git a/tutorials/interpret/normlime.py b/tutorials/interpret/normlime.py index 3e501388e44aeab8548ae123831bc3211b08cea7..f3a1129780ab87d6d242010a124760c9a64608bd 100644 --- a/tutorials/interpret/normlime.py +++ b/tutorials/interpret/normlime.py @@ -14,18 +14,22 @@ model_file = 'https://bj.bcebos.com/paddlex/interpret/mini_imagenet_veg_mobilene pdx.utils.download_and_decompress(model_file, path='./') # 加载模型 -model = pdx.load_model('mini_imagenet_veg_mobilenetv2') +model_file = 'mini_imagenet_veg_mobilenetv2' +model = pdx.load_model(model_file) # 定义测试所用的数据集 +dataset = 'mini_imagenet_veg' test_dataset = pdx.datasets.ImageNet( - data_dir='mini_imagenet_veg', - file_list=osp.join('mini_imagenet_veg', 'test_list.txt'), - label_list=osp.join('mini_imagenet_veg', 'labels.txt'), + data_dir=dataset, + file_list=osp.join(dataset, 'test_list.txt'), + label_list=osp.join(dataset, 'labels.txt'), transforms=model.test_transforms) # 可解释性可视化 pdx.interpret.normlime( - 'mini_imagenet_veg/mushroom/n07734744_1106.JPEG', - model, - test_dataset, - save_dir='./') + test_dataset.file_list[0][0], + model, + test_dataset, + save_dir='./', + normlime_weights_file='{}_{}.npy'.format( + dataset.split('/')[-1], model.model_name)) diff --git a/tutorials/train/segmentation/hrnet.py b/tutorials/train/segmentation/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..f887b78c3ae16ae66235f1965ada8bd2355d62c6 --- /dev/null +++ b/tutorials/train/segmentation/hrnet.py @@ -0,0 +1,50 @@ +import os +# 选择使用0号卡 +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +import paddlex as pdx +from paddlex.seg import transforms + +# 下载和解压视盘分割数据集 +optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz' +pdx.utils.download_and_decompress(optic_dataset, path='./') + +# 定义训练和验证时的transforms +train_transforms = transforms.Compose([ + transforms.RandomHorizontalFlip(), transforms.ResizeRangeScaling(), + transforms.RandomPaddingCrop(crop_size=512), transforms.Normalize() +]) + +eval_transforms = transforms.Compose([ + transforms.ResizeByLong(long_size=512), + transforms.Padding(target_size=512), transforms.Normalize() +]) + +# 定义训练和验证所用的数据集 +train_dataset = pdx.datasets.SegDataset( + data_dir='optic_disc_seg', + file_list='optic_disc_seg/train_list.txt', + label_list='optic_disc_seg/labels.txt', + transforms=train_transforms, + shuffle=True) +eval_dataset = pdx.datasets.SegDataset( + data_dir='optic_disc_seg', + file_list='optic_disc_seg/val_list.txt', + label_list='optic_disc_seg/labels.txt', + transforms=eval_transforms) + +# 初始化模型,并进行训练 +# 可使用VisualDL查看训练指标 +# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001 +# 浏览器打开 https://0.0.0.0:8001即可 +# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP +num_classes = len(train_dataset.labels) +model = pdx.seg.HRNet(num_classes=num_classes) +model.train( + num_epochs=20, + train_dataset=train_dataset, + train_batch_size=4, + eval_dataset=eval_dataset, + learning_rate=0.01, + save_dir='output/hrnet', + use_vdl=True)