diff --git a/PaddleCV/PaddleDetection/.gitignore b/PaddleCV/PaddleDetection/.gitignore deleted file mode 100644 index 43369eea55459685633f4a95743b60834dad2d74..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/.gitignore +++ /dev/null @@ -1,64 +0,0 @@ -# Virtualenv -/.venv/ -/venv/ - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] - -# C extensions -*.so - -# json file -*.json - -# Distribution / packaging -/bin/ -/build/ -/develop-eggs/ -/dist/ -/eggs/ -/lib/ -/lib64/ -/output/ -/parts/ -/sdist/ -/var/ -/*.egg-info/ -/.installed.cfg -/*.egg -/.eggs - -# AUTHORS and ChangeLog will be generated while packaging -/AUTHORS -/ChangeLog - -# BCloud / BuildSubmitter -/build_submitter.* -/logger_client_log - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -.tox/ -.coverage -.cache -.pytest_cache -nosetests.xml -coverage.xml - -# Translations -*.mo - -# Sphinx documentation -/docs/_build/ - -*.json - - -dataset/coco/annotations -dataset/coco/train2017 -dataset/coco/val2017 -dataset/voc/VOCdevkit diff --git a/PaddleCV/PaddleDetection/.style.yapf b/PaddleCV/PaddleDetection/.style.yapf deleted file mode 100644 index 4741fb4f3bbc6681088cf9e960321e7b857a93a8..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/.style.yapf +++ /dev/null @@ -1,3 +0,0 @@ -[style] -based_on_style = pep8 -column_limit = 80 diff --git a/PaddleCV/PaddleDetection/README.md b/PaddleCV/PaddleDetection/README.md deleted file mode 100644 index 488400bb36731c0e4d887fc0943c1a4ebd1d1a3d..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/README.md +++ /dev/null @@ -1,138 +0,0 @@ -# **This project has been deprecated, please visit https://github.com/PaddlePaddle/PaddleDetection**
- - - -English | [简体中文](README_cn.md) - -# PaddleDetection - -The goal of PaddleDetection is to provide easy access to a wide range of object -detection models in both industry and research settings. We design -PaddleDetection to be not only performant, production-ready but also highly -flexible, catering to research needs. - - -
- -
- - -## Introduction - -Features: - -- Production Ready: - - Key operations are implemented in C++ and CUDA, together with PaddlePaddle's -highly efficient inference engine, enables easy deployment in server environments. - -- Highly Flexible: - - Components are designed to be modular. Model architectures, as well as data -preprocess pipelines, can be easily customized with simple configuration -changes. - -- Performance Optimized: - - With the help of the underlying PaddlePaddle framework, faster training and -reduced GPU memory footprint is achieved. Notably, YOLOv3 training is -much faster compared to other frameworks. Another example is Mask-RCNN -(ResNet50), we managed to fit up to 4 images per GPU (Tesla V100 16GB) during -multi-GPU training. - -Supported Architectures: - -| | ResNet | ResNet-vd [1](#vd) | ResNeXt-vd | SENet | MobileNet | DarkNet | VGG | -| ------------------- | :----: | ----------------------------: | :--------: | :---: | :-------: | :-----: | :--: | -| Faster R-CNN | ✓ | ✓ | x | ✓ | ✗ | ✗ | ✗ | -| Faster R-CNN + FPN | ✓ | ✓ | ✓ | ✓ | ✗ | ✗ | ✗ | -| Mask R-CNN | ✓ | ✓ | x | ✓ | ✗ | ✗ | ✗ | -| Mask R-CNN + FPN | ✓ | ✓ | ✓ | ✓ | ✗ | ✗ | ✗ | -| Cascade Faster-RCNN | ✓ | ✓ | ✓ | ✗ | ✗ | ✗ | ✗ | -| Cascade Mask-RCNN | ✓ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | -| RetinaNet | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | ✗ | -| YOLOv3 | ✓ | ✗ | ✗ | ✗ | ✓ | ✓ | ✗ | -| SSD | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✓ | - -[1] [ResNet-vd](https://arxiv.org/pdf/1812.01187) models offer much improved accuracy with negligible performance cost. - -Advanced Features: - -- [x] **Synchronized Batch Norm**: currently used by YOLOv3. -- [x] **Group Norm** -- [x] **Modulated Deformable Convolution** -- [x] **Deformable PSRoI Pooling** - -**NOTE:** Synchronized batch normalization can only be used on multiple GPU devices, can not be used on CPU devices or single GPU device. - -## Get Started - -- [Installation guide](docs/INSTALL.md) -- [Quick start on small dataset](docs/QUICK_STARTED.md) -- [Guide to traing, evaluate and arguments description](docs/GETTING_STARTED.md) -- [Guide to preprocess pipeline and custom dataset](docs/DATA.md) -- [Introduction to the configuration workflow](docs/CONFIG.md) -- [Examples for detailed configuration explanation](docs/config_example/) -- [IPython Notebook demo](demo/mask_rcnn_demo.ipynb) -- [Transfer learning document](docs/TRANSFER_LEARNING.md) - -## Model Zoo - -- Pretrained models are available in the [PaddleDetection model zoo](docs/MODEL_ZOO.md). -- [Face detection models](configs/face_detection/README.md) -- [Pretrained models for pedestrian and vehicle detection](contrib/README.md) - -## Model compression - -- [ Quantification aware training example](slim/quantization) -- [ Pruning compression example](slim/prune) - -## Depoly - -- [Export model for inference depolyment](docs/EXPORT_MODEL.md) -- [C++ inference depolyment](inference/README.md) - -## Benchmark - -- [Inference benchmark](docs/BENCHMARK_INFER_cn.md) - - -## Updates - -#### 10/2019 - -- Face detection models included: BlazeFace, Faceboxes. -- Enrich COCO models, box mAP up to 51.9%. -- Add CACacascade RCNN, one of the best single model of Objects365 2019 challenge Full Track champion. -- Add pretrained models for pedestrian and vehicle detection. -- Support mixed-precision training. -- Add C++ inference depolyment. -- Add model compression examples. - -#### 2/9/2019 - -- Add retrained models for GroupNorm. - -- Add Cascade-Mask-RCNN+FPN. - -#### 5/8/2019 - -- Add a series of models ralated modulated Deformable Convolution. - -#### 7/29/2019 - -- Update Chinese docs for PaddleDetection -- Fix bug in R-CNN models when train and test at the same time -- Add ResNext101-vd + Mask R-CNN + FPN models -- Add YOLOv3 on VOC models - -#### 7/3/2019 - -- Initial release of PaddleDetection and detection model zoo -- Models included: Faster R-CNN, Mask R-CNN, Faster R-CNN+FPN, Mask - R-CNN+FPN, Cascade-Faster-RCNN+FPN, RetinaNet, YOLOv3, and SSD. - - -## Contributing - -Contributions are highly welcomed and we would really appreciate your feedback!! diff --git a/PaddleCV/PaddleDetection/README_cn.md b/PaddleCV/PaddleDetection/README_cn.md deleted file mode 100644 index 78708bdc19caf8915f9528ea52aea221a7a22656..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/README_cn.md +++ /dev/null @@ -1,124 +0,0 @@ -# **该项目已被迁移到 https://github.com/PaddlePaddle/PaddleDetection** - -[English](README.md) | 简体中文 - -# PaddleDetection - -PaddleDetection的目的是为工业界和学术界提供丰富、易用的目标检测模型。不仅性能优越、易于部署,而且能够灵活的满足算法研究的需求。 - -
- -
- - -## 简介 - -特性: - -- 易部署: - - PaddleDetection的模型中使用的核心算子均通过C++或CUDA实现,同时基于PaddlePaddle的高性能推理引擎可以方便地部署在多种硬件平台上。 - -- 高灵活度: - - PaddleDetection通过模块化设计来解耦各个组件,基于配置文件可以轻松地搭建各种检测模型。 - -- 高性能: - - 基于PaddlePaddle框架的高性能内核,在模型训练速度、显存占用上有一定的优势。例如,YOLOv3的训练速度快于其他框架,在Tesla V100 16GB环境下,Mask-RCNN(ResNet50)可以单卡Batch Size可以达到4 (甚至到5)。 - -支持的模型结构: - -| | ResNet | ResNet-vd [1](#vd) | ResNeXt-vd | SENet | MobileNet | DarkNet | VGG | -|--------------------|:------:|------------------------------:|:----------:|:-----:|:---------:|:-------:|:---:| -| Faster R-CNN | ✓ | ✓ | x | ✓ | ✗ | ✗ | ✗ | -| Faster R-CNN + FPN | ✓ | ✓ | ✓ | ✓ | ✗ | ✗ | ✗ | -| Mask R-CNN | ✓ | ✓ | x | ✓ | ✗ | ✗ | ✗ | -| Mask R-CNN + FPN | ✓ | ✓ | ✓ | ✓ | ✗ | ✗ | ✗ | -| Cascade Faster-CNN | ✓ | ✓ | ✓ | ✗ | ✗ | ✗ | ✗ | -| Cascade Mask-CNN | ✓ | ✗ | ✗ | ✓ | ✗ | ✗ | ✗ | -| RetinaNet | ✓ | ✗ | ✓ | ✗ | ✗ | ✗ | ✗ | -| YOLOv3 | ✓ | ✗ | ✗ | ✗ | ✓ | ✓ | ✗ | -| SSD | ✗ | ✗ | ✗ | ✗ | ✓ | ✗ | ✓ | - -[1] [ResNet-vd](https://arxiv.org/pdf/1812.01187) 模型提供了较大的精度提高和较少的性能损失。 - -扩展特性: - -- [x] **Synchronized Batch Norm**: 目前在YOLOv3中使用。 -- [x] **Group Norm** -- [x] **Modulated Deformable Convolution** -- [x] **Deformable PSRoI Pooling** - -**注意:** Synchronized batch normalization 只能在多GPU环境下使用,不能在CPU环境或者单GPU环境下使用。 - - -## 使用教程 - -- [安装说明](docs/INSTALL_cn.md) -- [快速开始](docs/QUICK_STARTED_cn.md) -- [训练、评估及参数说明](docs/GETTING_STARTED_cn.md) -- [数据预处理及自定义数据集](docs/DATA_cn.md) -- [配置模块设计和介绍](docs/CONFIG_cn.md) -- [详细的配置信息和参数说明示例](docs/config_example/) -- [IPython Notebook demo](demo/mask_rcnn_demo.ipynb) -- [迁移学习教程](docs/TRANSFER_LEARNING_cn.md) - -## 模型库 - -- [模型库](docs/MODEL_ZOO_cn.md) -- [人脸检测模型](configs/face_detection/README.md) -- [行人检测和车辆检测预训练模型](contrib/README_cn.md) - - -## 模型压缩 -- [量化训练压缩示例](slim/quantization) -- [剪枝压缩示例](slim/prune) - -## 推理部署 - -- [模型导出教程](docs/EXPORT_MODEL.md) -- [C++推理部署](inference/README.md) - -## Benchmark - -- [推理Benchmark](docs/BENCHMARK_INFER_cn.md) - - - -## 版本更新 - -### 10/2019 - -- 增加人脸检测模型BlazeFace、Faceboxes。 -- 丰富基于COCO的模型,精度高达51.9%。 -- 增加Objects365 2019 Challenge上夺冠的最佳单模型之一CACascade-RCNN。 -- 增加行人检测和车辆检测预训练模型。 -- 支持FP16训练。 -- 增加跨平台的C++推理部署方案。 -- 增加模型压缩示例。 - - -### 2/9/2019 -- 增加GroupNorm模型。 -- 增加CascadeRCNN+Mask模型。 - -#### 5/8/2019 -- 增加Modulated Deformable Convolution系列模型。 - -#### 7/22/2019 - -- 增加检测库中文文档 -- 修复R-CNN系列模型训练同时进行评估的问题 -- 新增ResNext101-vd + Mask R-CNN + FPN模型 -- 新增基于VOC数据集的YOLOv3模型 - -#### 7/3/2019 - -- 首次发布PaddleDetection检测库和检测模型库 -- 模型包括:Faster R-CNN, Mask R-CNN, Faster R-CNN+FPN, Mask - R-CNN+FPN, Cascade-Faster-RCNN+FPN, RetinaNet, YOLOv3, 和SSD. - -## 如何贡献代码 - -我们非常欢迎你可以为PaddleDetection提供代码,也十分感谢你的反馈。 diff --git a/PaddleCV/PaddleDetection/configs/cascade_mask_rcnn_r50_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/cascade_mask_rcnn_r50_fpn_1x.yml deleted file mode 100644 index 1d17f53c60e66061ab8a21d624f4191a15ee5a01..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/cascade_mask_rcnn_r50_fpn_1x.yml +++ /dev/null @@ -1,145 +0,0 @@ -architecture: CascadeMaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -use_gpu: true -max_iters: 180000 -snapshot_iter: 10000 -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -metric: COCO -weights: output/cascade_mask_rcnn_r50_fpn_1x/model_final/ -num_classes: 81 - -CascadeMaskRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: CascadeBBoxHead - bbox_assigner: CascadeBBoxAssigner - mask_assigner: MaskAssigner - mask_head: MaskHead - -ResNet: - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: affine_channel - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - sampling_ratio: 2 - box_resolution: 7 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - -CascadeBBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [10, 20, 30] - bg_thresh_hi: [0.5, 0.6, 0.7] - bg_thresh_lo: [0.0, 0.0, 0.0] - fg_fraction: 0.25 - fg_thresh: [0.5, 0.6, 0.7] - -MaskAssigner: - resolution: 28 - -CascadeBBoxHead: - head: CascadeTwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -CascadeTwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/cascade_rcnn_r50_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/cascade_rcnn_r50_fpn_1x.yml deleted file mode 100644 index 47c089c1d95a35aacdcc0766670dc984cb18dc32..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/cascade_rcnn_r50_fpn_1x.yml +++ /dev/null @@ -1,137 +0,0 @@ -architecture: CascadeRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 90000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -weights: output/cascade_rcnn_r50_fpn_1x/model_final -metric: COCO -num_classes: 81 - -CascadeRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: CascadeBBoxHead - bbox_assigner: CascadeBBoxAssigner - -ResNet: - norm_type: affine_channel - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - variant: b - -FPN: - min_level: 2 - max_level: 6 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - min_level: 2 - max_level: 6 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_positive_overlap: 0.7 - rpn_negative_overlap: 0.3 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - min_level: 2 - max_level: 5 - box_resolution: 7 - sampling_ratio: 2 - -CascadeBBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [10, 20, 30] - bg_thresh_lo: [0.0, 0.0, 0.0] - bg_thresh_hi: [0.5, 0.6, 0.7] - fg_thresh: [0.5, 0.6, 0.7] - fg_fraction: 0.25 - -CascadeBBoxHead: - head: CascadeTwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -CascadeTwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.02 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [60000, 80000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - batch_size: 2 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/cascade_rcnn_r50_fpn_1x_ms_test.yml b/PaddleCV/PaddleDetection/configs/cascade_rcnn_r50_fpn_1x_ms_test.yml deleted file mode 100644 index c345aeedbe6f65fbe19aaf87ddbadf5ed567c38d..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/cascade_rcnn_r50_fpn_1x_ms_test.yml +++ /dev/null @@ -1,177 +0,0 @@ -architecture: CascadeRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 90000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -weights: output/cascade_rcnn_r50_fpn_1x/model_final -metric: COCO -num_classes: 81 - -CascadeRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: CascadeBBoxHead - bbox_assigner: CascadeBBoxAssigner - -ResNet: - norm_type: affine_channel - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - variant: b - -FPN: - min_level: 2 - max_level: 6 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - min_level: 2 - max_level: 6 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_positive_overlap: 0.7 - rpn_negative_overlap: 0.3 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - min_level: 2 - max_level: 5 - box_resolution: 7 - sampling_ratio: 2 - -CascadeBBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [10, 20, 30] - bg_thresh_lo: [0.0, 0.0, 0.0] - bg_thresh_hi: [0.5, 0.6, 0.7] - fg_thresh: [0.5, 0.6, 0.7] - fg_fraction: 0.25 - -CascadeBBoxHead: - head: CascadeTwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -CascadeTwoFCHead: - mlp_dim: 1024 - -MultiScaleTEST: - score_thresh: 0.05 - nms_thresh: 0.5 - detections_per_im: 100 - enable_voting: true - vote_thresh: 0.9 - -LearningRate: - base_lr: 0.02 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [60000, 80000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - batch_size: 2 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - sample_transforms: - - !DecodeImage - to_rgb: true - - !NormalizeImage - is_channel_first: false - is_scale: true - mean: - - 0.485 - - 0.456 - - 0.406 - std: - - 0.229 - - 0.224 - - 0.225 - - !MultiscaleTestResize - origin_target_size: 800 - origin_max_size: 1333 - target_size: - - 400 - - 500 - - 600 - - 700 - - 900 - - 1000 - - 1100 - - 1200 - max_size: 2000 - use_flip: true - - !Permute - channel_first: true - to_bgr: false - batch_transforms: - - !PadMSTest - pad_to_stride: 32 - num_scale: 18 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.yml b/PaddleCV/PaddleDetection/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.yml deleted file mode 100755 index 6d666470f6969c1d4d3a7a1c2a16f11642c904d6..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.yml +++ /dev/null @@ -1,257 +0,0 @@ -architecture: CascadeMaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -max_iters: 300000 -snapshot_iter: 10 -use_gpu: true -log_iter: 20 -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_caffe_pretrained.tar -weights: output/cascade_mask_rcnn_dcn_se154_vd_fpn_gn_s1x/model_final/ -metric: COCO -num_classes: 81 - -CascadeMaskRCNN: - backbone: SENet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: CascadeBBoxHead - bbox_assigner: CascadeBBoxAssigner - mask_assigner: MaskAssigner - mask_head: MaskHead - -SENet: - depth: 152 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - group_width: 4 - groups: 64 - norm_type: bn - freeze_norm: True - variant: d - dcn_v2_stages: [3, 4, 5] - std_senet: True - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - freeze_norm: False - norm_type: gn - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - norm_type: gn - -CascadeBBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [10, 20, 30] - bg_thresh_hi: [0.5, 0.6, 0.7] - bg_thresh_lo: [0.0, 0.0, 0.0] - fg_fraction: 0.25 - fg_thresh: [0.5, 0.6, 0.7] - -MaskAssigner: - resolution: 28 - -CascadeBBoxHead: - head: CascadeXConvNormHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -CascadeXConvNormHead: - norm_type: gn - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 280000] - - !LinearWarmup - start_factor: 0.01 - steps: 2000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - sample_transforms: - - !DecodeImage - to_rgb: False - with_mixup: False - - !RandomFlipImage - is_mask_flip: true - is_normalized: false - prob: 0.5 - - !NormalizeImage - is_channel_first: false - is_scale: False - mean: - - 102.9801 - - 115.9465 - - 122.7717 - std: - - 1.0 - - 1.0 - - 1.0 - - !ResizeImage - interp: 1 - target_size: - - 416 - - 448 - - 480 - - 512 - - 544 - - 576 - - 608 - - 640 - - 672 - - 704 - - 736 - - 768 - - 800 - - 832 - - 864 - - 896 - - 928 - - 960 - - 992 - - 1024 - - 1056 - - 1088 - - 1120 - - 1152 - - 1184 - - 1216 - - 1248 - - 1280 - - 1312 - - 1344 - - 1376 - - 1408 - max_size: 1600 - use_cv2: true - - !Permute - channel_first: true - to_bgr: false - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 8 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - sample_transforms: - - !DecodeImage - to_rgb: False - with_mixup: False - - !NormalizeImage - is_channel_first: false - is_scale: False - mean: - - 102.9801 - - 115.9465 - - 122.7717 - std: - - 1.0 - - 1.0 - - 1.0 - - !ResizeImage - interp: 1 - target_size: - - 800 - max_size: 1333 - use_cv2: true - - !Permute - channel_first: true - to_bgr: false - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - sample_transforms: - - !DecodeImage - to_rgb: False - with_mixup: False - - !NormalizeImage - is_channel_first: false - is_scale: False - mean: - - 102.9801 - - 115.9465 - - 122.7717 - std: - - 1.0 - - 1.0 - - 1.0 - - !Permute - channel_first: true - to_bgr: false - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml b/PaddleCV/PaddleDetection/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml deleted file mode 100644 index ea0b375b6f87cbbc358b3b7ec213cdab16661e47..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml +++ /dev/null @@ -1,272 +0,0 @@ -architecture: CascadeMaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -max_iters: 300000 -snapshot_iter: 10000 -use_gpu: true -log_iter: 20 -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_caffe_pretrained.tar -weights: output/cascade_mask_rcnn_dcn_se154_vd_fpn_gn_s1x/model_final/ -metric: COCO -num_classes: 81 - -CascadeMaskRCNN: - backbone: SENet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: CascadeBBoxHead - bbox_assigner: CascadeBBoxAssigner - mask_assigner: MaskAssigner - mask_head: MaskHead - -SENet: - depth: 152 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - group_width: 4 - groups: 64 - norm_type: bn - freeze_norm: True - variant: d - dcn_v2_stages: [3, 4, 5] - std_senet: True - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - freeze_norm: False - norm_type: gn - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - norm_type: gn - -CascadeBBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [10, 20, 30] - bg_thresh_hi: [0.5, 0.6, 0.7] - bg_thresh_lo: [0.0, 0.0, 0.0] - fg_fraction: 0.25 - fg_thresh: [0.5, 0.6, 0.7] - -MaskAssigner: - resolution: 28 - -CascadeBBoxHead: - head: CascadeXConvNormHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -CascadeXConvNormHead: - norm_type: gn - -MultiScaleTEST: - score_thresh: 0.05 - nms_thresh: 0.5 - detections_per_im: 100 - enable_voting: true - vote_thresh: 0.9 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 280000] - - !LinearWarmup - start_factor: 0.01 - steps: 2000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - sample_transforms: - - !DecodeImage - to_rgb: False - with_mixup: False - - !RandomFlipImage - is_mask_flip: true - is_normalized: false - prob: 0.5 - - !NormalizeImage - is_channel_first: false - is_scale: False - mean: - - 102.9801 - - 115.9465 - - 122.7717 - std: - - 1.0 - - 1.0 - - 1.0 - - !ResizeImage - interp: 1 - target_size: - - 416 - - 448 - - 480 - - 512 - - 544 - - 576 - - 608 - - 640 - - 672 - - 704 - - 736 - - 768 - - 800 - - 832 - - 864 - - 896 - - 928 - - 960 - - 992 - - 1024 - - 1056 - - 1088 - - 1120 - - 1152 - - 1184 - - 1216 - - 1248 - - 1280 - - 1312 - - 1344 - - 1376 - - 1408 - max_size: 1600 - use_cv2: true - - !Permute - channel_first: true - to_bgr: false - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 8 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - sample_transforms: - - !DecodeImage - to_rgb: False - - !NormalizeImage - is_channel_first: false - is_scale: False - mean: - - 102.9801 - - 115.9465 - - 122.7717 - std: - - 1.0 - - 1.0 - - 1.0 - - !MultiscaleTestResize - origin_target_size: 800 - origin_max_size: 1333 - target_size: - - 400 - - 500 - - 600 - - 700 - - 900 - - 1000 - - 1100 - - 1200 - max_size: 2000 - use_flip: true - - !Permute - channel_first: true - to_bgr: false - batch_transforms: - - !PadMSTest - pad_to_stride: 32 - # num_scale = (len(target_size) + 1) * (1 + use_flip) - num_scale: 18 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - sample_transforms: - - !DecodeImage - to_rgb: False - - !NormalizeImage - is_channel_first: false - is_scale: False - mean: - - 102.9801 - - 115.9465 - - 122.7717 - std: - - 1.0 - - 1.0 - - 1.0 - - !Permute - channel_first: true - to_bgr: false - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/dcn/cascade_rcnn_dcn_r101_vd_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/dcn/cascade_rcnn_dcn_r101_vd_fpn_1x.yml deleted file mode 100644 index 93373adb3a7f72b64ba45996ef61c2a9d3da8414..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/dcn/cascade_rcnn_dcn_r101_vd_fpn_1x.yml +++ /dev/null @@ -1,139 +0,0 @@ -architecture: CascadeRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 90000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -log_iter: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar -weights: output/cascade_rcnn_dcn_r101_vd_fpn_1x/model_final -metric: COCO -num_classes: 81 - -CascadeRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: CascadeBBoxHead - bbox_assigner: CascadeBBoxAssigner - -ResNet: - norm_type: bn - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - variant: d - dcn_v2_stages: [3, 4, 5] - -FPN: - min_level: 2 - max_level: 6 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - min_level: 2 - max_level: 6 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_positive_overlap: 0.7 - rpn_negative_overlap: 0.3 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - min_level: 2 - max_level: 5 - box_resolution: 7 - sampling_ratio: 2 - -CascadeBBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [10, 20, 30] - bg_thresh_lo: [0.0, 0.0, 0.0] - bg_thresh_hi: [0.5, 0.6, 0.7] - fg_thresh: [0.5, 0.6, 0.7] - fg_fraction: 0.25 - -CascadeBBoxHead: - head: CascadeTwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -CascadeTwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.02 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [60000, 80000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - batch_size: 2 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x.yml deleted file mode 100644 index 4c74bd877b644659812bbcab960a4ce4600277ef..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x.yml +++ /dev/null @@ -1,139 +0,0 @@ -architecture: CascadeRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 90000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -log_iter: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -weights: output/cascade_rcnn_dcn_r50_fpn_1x/model_final -metric: COCO -num_classes: 81 - -CascadeRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: CascadeBBoxHead - bbox_assigner: CascadeBBoxAssigner - -ResNet: - norm_type: bn - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - variant: b - dcn_v2_stages: [3, 4, 5] - -FPN: - min_level: 2 - max_level: 6 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - min_level: 2 - max_level: 6 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_positive_overlap: 0.7 - rpn_negative_overlap: 0.3 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - min_level: 2 - max_level: 5 - box_resolution: 7 - sampling_ratio: 2 - -CascadeBBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [10, 20, 30] - bg_thresh_lo: [0.0, 0.0, 0.0] - bg_thresh_hi: [0.5, 0.6, 0.7] - fg_thresh: [0.5, 0.6, 0.7] - fg_fraction: 0.25 - -CascadeBBoxHead: - head: CascadeTwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -CascadeTwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.02 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [60000, 80000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - batch_size: 2 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml deleted file mode 100644 index dbbe2d8014d716e68ba481f4b66eb7fe50164356..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml +++ /dev/null @@ -1,141 +0,0 @@ -architecture: CascadeRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 90000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -log_iter: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar -weights: output/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x/model_final -metric: COCO -num_classes: 81 - -CascadeRCNN: - backbone: ResNeXt - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: CascadeBBoxHead - bbox_assigner: CascadeBBoxAssigner - -ResNeXt: - norm_type: bn - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - group_width: 4 - groups: 64 - variant: d - dcn_v2_stages: [3, 4, 5] - -FPN: - min_level: 2 - max_level: 6 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - min_level: 2 - max_level: 6 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_positive_overlap: 0.7 - rpn_negative_overlap: 0.3 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - min_level: 2 - max_level: 5 - box_resolution: 7 - sampling_ratio: 2 - -CascadeBBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [10, 20, 30] - bg_thresh_lo: [0.0, 0.0, 0.0] - bg_thresh_hi: [0.5, 0.6, 0.7] - fg_thresh: [0.5, 0.6, 0.7] - fg_fraction: 0.25 - -CascadeBBoxHead: - head: CascadeTwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -CascadeTwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.02 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [60000, 80000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - batch_size: 2 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x.yml deleted file mode 100644 index d6a949efb1e68ae7c258e13f343d46e81746f77b..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x.yml +++ /dev/null @@ -1,139 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 90000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -log_iter: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar -weights: output/faster_rcnn_dcn_r101_vd_fpn_1x/model_final -metric: COCO -num_classes: 81 - -FasterRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: bn - variant: d - dcn_v2_stages: [3, 4, 5] - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 2000 - pre_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 1000 - pre_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.02 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [60000, 80000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - # batch size per device - batch_size: 2 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/dcn/faster_rcnn_dcn_r50_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/dcn/faster_rcnn_dcn_r50_fpn_1x.yml deleted file mode 100644 index 2048d61e46d4ce93ffb1c0d3bfd1075be2ea4472..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/dcn/faster_rcnn_dcn_r50_fpn_1x.yml +++ /dev/null @@ -1,138 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 90000 -use_gpu: true -snapshot_iter: 10000 -log_smooth_window: 20 -log_iter: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -metric: COCO -weights: output/faster_rcnn_dcn_r50_fpn_1x/model_final -num_classes: 81 - -FasterRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 50 - norm_type: bn - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - dcn_v2_stages: [3, 4, 5] - -FPN: - min_level: 2 - max_level: 6 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - min_level: 2 - max_level: 6 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_positive_overlap: 0.7 - rpn_negative_overlap: 0.3 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - min_level: 2 - max_level: 5 - box_resolution: 7 - sampling_ratio: 2 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_lo: 0.0 - bg_thresh_hi: 0.5 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.02 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [60000, 80000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - batch_size: 2 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x.yml b/PaddleCV/PaddleDetection/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x.yml deleted file mode 100644 index f0cb0ba27d31247415a479619d63da265b16df27..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x.yml +++ /dev/null @@ -1,139 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 180000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -log_iter: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar -weights: output/faster_rcnn_dcn_r50_vd_fpn_2x/model_final -metric: COCO -num_classes: 81 - -FasterRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: bn - variant: d - dcn_v2_stages: [3, 4, 5] - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 2000 - pre_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 1000 - pre_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.02 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - # batch size per device - batch_size: 2 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml deleted file mode 100644 index b6d7b81cc3cc1944e71b0159bec1bc56c35dcf53..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml +++ /dev/null @@ -1,143 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 180000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -log_iter: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar -weights: output/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x/model_final -metric: COCO -num_classes: 81 - -FasterRCNN: - backbone: ResNeXt - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNeXt: - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - group_width: 4 - groups: 64 - norm_type: bn - variant: d - dcn_v2_stages: [3, 4, 5] - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 2000 - pre_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 1000 - pre_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - shuffle: true - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - shuffle: false diff --git a/PaddleCV/PaddleDetection/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x.yml deleted file mode 100644 index f5a0b7c458a0a0856a98533a047407f58c684adc..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x.yml +++ /dev/null @@ -1,146 +0,0 @@ -architecture: MaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -max_iters: 180000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -log_iter: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar -weights: output/mask_rcnn_dcn_r101_vd_fpn_1x/model_final -metric: COCO -num_classes: 81 - -MaskRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: bn - variant: d - dcn_v2_stages: [3, 4, 5] - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - sampling_ratio: 2 - box_resolution: 7 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -MaskAssigner: - resolution: 28 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/dcn/mask_rcnn_dcn_r50_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/dcn/mask_rcnn_dcn_r50_fpn_1x.yml deleted file mode 100644 index 04653001e9bc27bc247fe420076bd12923199263..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/dcn/mask_rcnn_dcn_r50_fpn_1x.yml +++ /dev/null @@ -1,145 +0,0 @@ -architecture: MaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -use_gpu: true -max_iters: 180000 -snapshot_iter: 10000 -log_smooth_window: 20 -log_iter: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -metric: COCO -weights: output/mask_rcnn_dcn_r50_fpn_1x/model_final/ -num_classes: 81 - -MaskRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: bn - dcn_v2_stages: [3, 4, 5] - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - sampling_ratio: 2 - box_resolution: 7 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -MaskAssigner: - resolution: 28 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x.yml b/PaddleCV/PaddleDetection/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x.yml deleted file mode 100644 index d008fb84dcd24d0853a53717000a4b6578002564..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x.yml +++ /dev/null @@ -1,147 +0,0 @@ -architecture: MaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -use_gpu: true -max_iters: 360000 -snapshot_iter: 10000 -log_smooth_window: 20 -log_iter: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar -metric: COCO -weights: output/mask_rcnn_dcn_r50_vd_fpn_2x/model_final/ -num_classes: 81 - -MaskRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: bn - variant: d - dcn_v2_stages: [3, 4, 5] - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -MaskAssigner: - resolution: 28 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 320000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml deleted file mode 100644 index 8076d1a5d4c209e174cf02d1a5dd36c3716456f9..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml +++ /dev/null @@ -1,148 +0,0 @@ -architecture: MaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -max_iters: 180000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -log_iter: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar -weights: output/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x/model_final -metric: COCO -num_classes: 81 - -MaskRCNN: - backbone: ResNeXt - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNeXt: - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - group_width: 4 - groups: 64 - norm_type: bn - variant: d - dcn_v2_stages: [3, 4, 5] - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - sampling_ratio: 2 - box_resolution: 7 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -MaskAssigner: - resolution: 28 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/face_detection/README.md b/PaddleCV/PaddleDetection/configs/face_detection/README.md deleted file mode 100644 index b5e2119f8148dfa3a0ed5930ca285cce50236e2e..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/face_detection/README.md +++ /dev/null @@ -1,261 +0,0 @@ -English | [简体中文](README_cn.md) - -# FaceDetection -The goal of FaceDetection is to provide efficient and high-speed face detection solutions, -including cutting-edge and classic models. - - -
- -
- -## Data Pipline -We use the [WIDER FACE dataset](http://shuoyang1213.me/WIDERFACE/) to carry out the training -and testing of the model, the official website gives detailed data introduction. -- WIDER Face data source: -Loads `wider_face` type dataset with directory structures like this: - - ``` - dataset/wider_face/ - ├── wider_face_split - │ ├── wider_face_train_bbx_gt.txt - │ ├── wider_face_val_bbx_gt.txt - ├── WIDER_train - │ ├── images - │ │ ├── 0--Parade - │ │ │ ├── 0_Parade_marchingband_1_100.jpg - │ │ │ ├── 0_Parade_marchingband_1_381.jpg - │ │ │ │ ... - │ │ ├── 10--People_Marching - │ │ │ ... - ├── WIDER_val - │ ├── images - │ │ ├── 0--Parade - │ │ │ ├── 0_Parade_marchingband_1_1004.jpg - │ │ │ ├── 0_Parade_marchingband_1_1045.jpg - │ │ │ │ ... - │ │ ├── 10--People_Marching - │ │ │ ... - ``` - -- Download dataset manually: -To download the WIDER FACE dataset, run the following commands: -``` -cd dataset/wider_face && ./download.sh -``` - -- Download dataset automatically: -If a training session is started but the dataset is not setup properly -(e.g, not found in dataset/wider_face), PaddleDetection can automatically -download them from [WIDER FACE dataset](http://shuoyang1213.me/WIDERFACE/), -the decompressed datasets will be cached in ~/.cache/paddle/dataset/ and can be discovered -automatically subsequently. - -### Data Augmentation - -- **Data-anchor-sampling:** Randomly transform the scale of the image to a certain range of scales, -greatly enhancing the scale change of the face. The specific operation is to obtain $v=\sqrt{width * height}$ -according to the randomly selected face height and width, and judge the value of `v` in which interval of - `[16,32,64,128]`. Assuming `v=45` && `32[1](#lite) | NAS [2](#nas) | -|:------------------------:|:--------:|:--------------------------:|:------------------------:| -| [BlazeFace](#BlazeFace) | ✓ | ✓ | ✓ | -| [FaceBoxes](#FaceBoxes) | ✓ | ✓ | x | - -[1] `Lite` edition means reduces the number of network layers and channels. -[2] `NAS` edition means use `Neural Architecture Search` algorithm to -optimized network structure. - -**Todo List:** -- [ ] HamBox -- [ ] Pyramidbox - -### Model Zoo - -#### mAP in WIDER FACE - -| Architecture | Type | Size | Img/gpu | Lr schd | Easy Set | Medium Set | Hard Set | Download | -|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:| -| BlazeFace | Original | 640 | 8 | 32w | **0.915** | **0.892** | **0.797** | [model](https://paddlemodels.bj.bcebos.com/object_detection/blazeface_original.tar) | -| BlazeFace | Lite | 640 | 8 | 32w | 0.909 | 0.885 | 0.781 | [model](https://paddlemodels.bj.bcebos.com/object_detection/blazeface_lite.tar) | -| BlazeFace | NAS | 640 | 8 | 32w | 0.837 | 0.807 | 0.658 | [model](https://paddlemodels.bj.bcebos.com/object_detection/blazeface_nas.tar) | -| FaceBoxes | Original | 640 | 8 | 32w | 0.875 | 0.848 | 0.568 | [model](https://paddlemodels.bj.bcebos.com/object_detection/faceboxes_original.tar) | -| FaceBoxes | Lite | 640 | 8 | 32w | 0.898 | 0.872 | 0.752 | [model](https://paddlemodels.bj.bcebos.com/object_detection/faceboxes_lite.tar) | - -**NOTES:** -- Get mAP in `Easy/Medium/Hard Set` by multi-scale evaluation in `tools/face_eval.py`. -For details can refer to [Evaluation](#Evaluate-on-the-WIDER-FACE). -- BlazeFace-Lite Training and Testing ues [blazeface.yml](../../configs/face_detection/blazeface.yml) -configs file and set `lite_edition: true`. - -#### mAP in FDDB - -| Architecture | Type | Size | DistROC | ContROC | -|:------------:|:--------:|:----:|:-------:|:-------:| -| BlazeFace | Original | 640 | **0.992** | **0.762** | -| BlazeFace | Lite | 640 | 0.990 | 0.756 | -| BlazeFace | NAS | 640 | 0.981 | 0.741 | -| FaceBoxes | Original | 640 | 0.985 | 0.731 | -| FaceBoxes | Lite | 640 | 0.987 | 0.741 | - -**NOTES:** -- Get mAP by multi-scale evaluation on the FDDB dataset. -For details can refer to [Evaluation](#Evaluate-on-the-FDDB). - -#### Infer Time and Model Size comparison - -| Architecture | Type | Size | P4 (ms) | CPU (ms) | ARM (ms) | File size (MB) | Flops | -|:------------:|:--------:|:----:|:---------:|:--------:|:----------:|:--------------:|:---------:| -| BlazeFace | Original | 128 | - | - | - | - | - | -| BlazeFace | Lite | 128 | - | - | - | - | - | -| BlazeFace | NAS | 128 | - | - | - | - | - | -| FaceBoxes | Original | 128 | - | - | - | - | - | -| FaceBoxes | Lite | 128 | - | - | - | - | - | -| BlazeFace | Original | 320 | - | - | - | - | - | -| BlazeFace | Lite | 320 | - | - | - | - | - | -| BlazeFace | NAS | 320 | - | - | - | - | - | -| FaceBoxes | Original | 320 | - | - | - | - | - | -| FaceBoxes | Lite | 320 | - | - | - | - | - | -| BlazeFace | Original | 640 | - | - | - | - | - | -| BlazeFace | Lite | 640 | - | - | - | - | - | -| BlazeFace | NAS | 640 | - | - | - | - | - | -| FaceBoxes | Original | 640 | - | - | - | - | - | -| FaceBoxes | Lite | 640 | - | - | - | - | - | - - -**NOTES:** -- CPU: i5-7360U @ 2.30GHz. Single core and single thread. - - - -## Get Started -`Training` and `Inference` please refer to [GETTING_STARTED.md](../../docs/GETTING_STARTED.md) -- **NOTES:** -- `BlazeFace` and `FaceBoxes` is trained in 4 GPU with `batch_size=8` per gpu (total batch size as 32) -and trained 320000 iters.(If your GPU count is not 4, please refer to the rule of training parameters -in the table of [calculation rules](../../docs/GETTING_STARTED.md#faq)) -- Currently we do not support evaluation in training. - -### Evaluation -``` -export CUDA_VISIBLE_DEVICES=0 -export PYTHONPATH=$PYTHONPATH:. -python tools/face_eval.py -c configs/face_detection/blazeface.yml -``` -- Optional arguments -- `-d` or `--dataset_dir`: Dataset path, same as dataset_dir of configs. Such as: `-d dataset/wider_face`. -- `-f` or `--output_eval`: Evaluation file directory, default is `output/pred`. -- `-e` or `--eval_mode`: Evaluation mode, include `widerface` and `fddb`, default is `widerface`. -- `--multi_scale`: If you add this action button in the command, it will select `multi_scale` evaluation. -Default is `False`, it will select `single-scale` evaluation. - -After the evaluation is completed, the test result in txt format will be generated in `output/pred`, -and then mAP will be calculated according to different data sets. If you set `--eval_mode=widerface`, -it will [Evaluate on the WIDER FACE](#Evaluate-on-the-WIDER-FACE).If you set `--eval_mode=fddb`, -it will [Evaluate on the FDDB](#Evaluate-on-the-FDDB). - -#### Evaluate on the WIDER FACE -- Download the official evaluation script to evaluate the AP metrics: -``` -wget http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/eval_script/eval_tools.zip -unzip eval_tools.zip && rm -f eval_tools.zip -``` -- Modify the result path and the name of the curve to be drawn in `eval_tools/wider_eval.m`: -``` -# Modify the folder name where the result is stored. -pred_dir = './pred'; -# Modify the name of the curve to be drawn -legend_name = 'Fluid-BlazeFace'; -``` -- `wider_eval.m` is the main execution program of the evaluation module. The run command is as follows: -``` -matlab -nodesktop -nosplash -nojvm -r "run wider_eval.m;quit;" -``` - -#### Evaluate on the FDDB -[FDDB dataset](http://vis-www.cs.umass.edu/fddb/) details can refer to FDDB's official website. -- Download the official dataset and evaluation script to evaluate the ROC metrics: -``` -#external link to the Faces in the Wild data set -wget http://tamaraberg.com/faceDataset/originalPics.tar.gz -#The annotations are split into ten folds. See README for details. -wget http://vis-www.cs.umass.edu/fddb/FDDB-folds.tgz -#information on directory structure and file formats -wget http://vis-www.cs.umass.edu/fddb/README.txt -``` -- Install OpenCV: Requires [OpenCV library](http://sourceforge.net/projects/opencvlibrary/) -If the utility 'pkg-config' is not available for your operating system, -edit the Makefile to manually specify the OpenCV flags as following: -``` -INCS = -I/usr/local/include/opencv -LIBS = -L/usr/local/lib -lcxcore -lcv -lhighgui -lcvaux -lml -``` - -- Compile FDDB evaluation code: execute `make` in evaluation folder. - -- Generate full image path list and groundtruth in FDDB-folds. The run command is as follows: -``` -cat `ls|grep -v"ellipse"` > filePath.txt` and `cat *ellipse* > fddb_annotFile.txt` -``` -- Evaluation -Finally evaluation command is: -``` -./evaluate -a ./FDDB/FDDB-folds/fddb_annotFile.txt \ - -d DETECTION_RESULT.txt -f 0 \ - -i ./FDDB -l ./FDDB/FDDB-folds/filePath.txt \ - -r ./OUTPUT_DIR -z .jpg -``` -**NOTES:** The interpretation of the argument can be performed by `./evaluate --help`. - -## Algorithm Description - -### BlazeFace -**Introduction:** -[BlazeFace](https://arxiv.org/abs/1907.05047) is Google Research published face detection model. -It's lightweight but good performance, and tailored for mobile GPU inference. It runs at a speed -of 200-1000+ FPS on flagship devices. - -**Particularity:** -- Anchor scheme stops at 8×8(input 128x128), 6 anchors per pixel at that resolution. -- 5 single, and 6 double BlazeBlocks: 5×5 depthwise convs, same accuracy with fewer layers. -- Replace the non-maximum suppression algorithm with a blending strategy that estimates the -regression parameters of a bounding box as a weighted mean between the overlapping predictions. - -**Edition information:** -- Original: Reference original paper reproduction. -- Lite: Replace 5x5 conv with 3x3 conv, fewer network layers and conv channels. -- NAS: use `Neural Architecture Search` algorithm to optimized network structure, -less network layer and conv channel number than `Lite`. - -### FaceBoxes -**Introduction:** -[FaceBoxes](https://arxiv.org/abs/1708.05234) which named A CPU Real-time Face Detector -with High Accuracy is face detector proposed by Shifeng Zhang, with high performance on -both speed and accuracy. This paper is published by IJCB(2017). - -**Particularity:** -- Anchor scheme stops at 20x20, 10x10, 5x5, which network input size is 640x640, -including 3, 1, 1 anchors per pixel at each resolution. The corresponding densities -are 1, 2, 4(20x20), 4(10x10) and 4(5x5). -- 2 convs with CReLU, 2 poolings, 3 inceptions and 2 convs with ReLU. -- Use density prior box to improve detection accuracy. - -**Edition information:** -- Original: Reference original paper reproduction. -- Lite: 2 convs with CReLU, 1 pooling, 2 convs with ReLU, 3 inceptions and 2 convs with ReLU. -Anchor scheme stops at 80x80 and 40x40, including 3, 1 anchors per pixel at each resolution. -The corresponding densities are 1, 2, 4(80x80) and 4(40x40), using less conv channel number than lite. - - -## Contributing -Contributions are highly welcomed and we would really appreciate your feedback!! diff --git a/PaddleCV/PaddleDetection/configs/face_detection/blazeface.yml b/PaddleCV/PaddleDetection/configs/face_detection/blazeface.yml deleted file mode 100644 index 692f14a7cc8091bc8df1f5edbfbca2a9c59b0073..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/face_detection/blazeface.yml +++ /dev/null @@ -1,130 +0,0 @@ -architecture: BlazeFace -max_iters: 320000 -train_feed: SSDTrainFeed -eval_feed: SSDEvalFeed -test_feed: SSDTestFeed -pretrain_weights: -use_gpu: true -snapshot_iter: 10000 -log_smooth_window: 20 -log_iter: 20 -metric: WIDERFACE -save_dir: output -weights: output/blazeface/model_final/ -# 1(label_class) + 1(background) -num_classes: 2 - -BlazeFace: - backbone: BlazeNet - output_decoder: - keep_top_k: 750 - nms_threshold: 0.3 - nms_top_k: 5000 - score_threshold: 0.01 - min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]] - use_density_prior_box: false - -BlazeNet: - with_extra_blocks: true - lite_edition: false - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 300000] - -OptimizerBuilder: - optimizer: - momentum: 0.0 - type: RMSPropOptimizer - regularizer: - factor: 0.0005 - type: L2 - -SSDTrainFeed: - batch_size: 8 - use_process: True - dataset: - dataset_dir: dataset/wider_face - annotation: wider_face_split/wider_face_train_bbx_gt.txt - image_dir: WIDER_train/images - image_shape: [3, 640, 640] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeBox {} - - !RandomDistort - brightness_lower: 0.875 - brightness_upper: 1.125 - is_order: true - - !ExpandImage - max_ratio: 4 - prob: 0.5 - - !CropImageWithDataAchorSampling - anchor_sampler: - - [1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0] - batch_sampler: - - [1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - target_size: 640 - - !RandomInterpImage - target_size: 640 - - !RandomFlipImage - is_normalized: true - - !Permute {} - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [127.502231, 127.502231, 127.502231] - -SSDEvalFeed: - batch_size: 1 - use_process: false - fields: ['image', 'im_id', 'gt_box'] - dataset: - dataset_dir: dataset/wider_face - annotation: wider_face_split/wider_face_val_bbx_gt.txt - image_dir: WIDER_val/images - drop_last: false - image_shape: [3, 640, 640] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeBox {} - - !ResizeImage - interp: 1 - target_size: 640 - use_cv2: false - - !Permute {} - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [127.502231, 127.502231, 127.502231] - -SSDTestFeed: - batch_size: 1 - use_process: false - dataset: - use_default_label: true - drop_last: false - image_shape: [3, 640, 640] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !ResizeImage - interp: 1 - target_size: 640 - use_cv2: false - - !Permute {} - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [127.502231, 127.502231, 127.502231] diff --git a/PaddleCV/PaddleDetection/configs/face_detection/blazeface_nas.yml b/PaddleCV/PaddleDetection/configs/face_detection/blazeface_nas.yml deleted file mode 100644 index 45356bda7998c18b286edaa4e308f21875cce9d3..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/face_detection/blazeface_nas.yml +++ /dev/null @@ -1,132 +0,0 @@ -architecture: BlazeFace -max_iters: 320000 -train_feed: SSDTrainFeed -eval_feed: SSDEvalFeed -test_feed: SSDTestFeed -pretrain_weights: -use_gpu: true -snapshot_iter: 10000 -log_smooth_window: 20 -log_iter: 20 -metric: WIDERFACE -save_dir: output -weights: output/blazeface_nas/model_final/ -# 1(label_class) + 1(background) -num_classes: 2 - -BlazeFace: - backbone: BlazeNet - output_decoder: - keep_top_k: 750 - nms_threshold: 0.3 - nms_top_k: 5000 - score_threshold: 0.01 - min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]] - use_density_prior_box: false - -BlazeNet: - blaze_filters: [[12, 12], [12, 12, 2], [12, 12]] - double_blaze_filters: [[12, 16, 24, 2], [24, 12, 24], [24, 16, 72, 2], [72, 12, 72]] - with_extra_blocks: true - lite_edition: false - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 300000] - -OptimizerBuilder: - optimizer: - momentum: 0.0 - type: RMSPropOptimizer - regularizer: - factor: 0.0005 - type: L2 - -SSDTrainFeed: - batch_size: 8 - use_process: True - dataset: - dataset_dir: dataset/wider_face - annotation: wider_face_split/wider_face_train_bbx_gt.txt - image_dir: WIDER_train/images - image_shape: [3, 640, 640] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeBox {} - - !RandomDistort - brightness_lower: 0.875 - brightness_upper: 1.125 - is_order: true - - !ExpandImage - max_ratio: 4 - prob: 0.5 - - !CropImageWithDataAchorSampling - anchor_sampler: - - [1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0] - batch_sampler: - - [1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - target_size: 640 - - !RandomInterpImage - target_size: 640 - - !RandomFlipImage - is_normalized: true - - !Permute {} - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [127.502231, 127.502231, 127.502231] - -SSDEvalFeed: - batch_size: 1 - use_process: false - fields: ['image', 'im_id', 'gt_box'] - dataset: - dataset_dir: dataset/wider_face - annotation: wider_face_split/wider_face_val_bbx_gt.txt - image_dir: WIDER_val/images - drop_last: false - image_shape: [3, 640, 640] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeBox {} - - !ResizeImage - interp: 1 - target_size: 640 - use_cv2: false - - !Permute {} - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [127.502231, 127.502231, 127.502231] - -SSDTestFeed: - batch_size: 1 - use_process: false - dataset: - use_default_label: true - drop_last: false - image_shape: [3, 640, 640] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !ResizeImage - interp: 1 - target_size: 640 - use_cv2: false - - !Permute {} - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [127.502231, 127.502231, 127.502231] diff --git a/PaddleCV/PaddleDetection/configs/face_detection/faceboxes.yml b/PaddleCV/PaddleDetection/configs/face_detection/faceboxes.yml deleted file mode 100644 index b278723292ddf51e7a93a88b59f3b757f5d4455e..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/face_detection/faceboxes.yml +++ /dev/null @@ -1,130 +0,0 @@ -architecture: FaceBoxes -train_feed: SSDTrainFeed -eval_feed: SSDEvalFeed -test_feed: SSDTestFeed -pretrain_weights: -use_gpu: true -max_iters: 320000 -snapshot_iter: 10000 -log_smooth_window: 20 -log_iter: 20 -metric: WIDERFACE -save_dir: output -weights: output/faceboxes/model_final/ -# 1(label_class) + 1(background) -num_classes: 2 - -FaceBoxes: - backbone: FaceBoxNet - densities: [[4, 2, 1], [1], [1]] - fixed_sizes: [[32., 64., 128.], [256.], [512.]] - output_decoder: - keep_top_k: 750 - nms_threshold: 0.3 - nms_top_k: 5000 - score_threshold: 0.01 - -FaceBoxNet: - with_extra_blocks: true - lite_edition: false - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 300000] - -OptimizerBuilder: - optimizer: - momentum: 0.0 - type: RMSPropOptimizer - regularizer: - factor: 0.0005 - type: L2 - -SSDTrainFeed: - batch_size: 8 - use_process: True - dataset: - dataset_dir: dataset/wider_face - annotation: wider_face_split/wider_face_train_bbx_gt.txt - image_dir: WIDER_train/images - image_shape: [3, 640, 640] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeBox {} - - !RandomDistort - brightness_lower: 0.875 - brightness_upper: 1.125 - is_order: true - - !ExpandImage - max_ratio: 4 - prob: 0.5 - - !CropImageWithDataAchorSampling - anchor_sampler: - - [1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0] - batch_sampler: - - [1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - target_size: 640 - - !RandomInterpImage - target_size: 640 - - !RandomFlipImage - is_normalized: true - - !Permute {} - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [127.502231, 127.502231, 127.502231] - -SSDEvalFeed: - batch_size: 1 - use_process: false - fields: ['image', 'im_id', 'gt_box'] - dataset: - dataset_dir: dataset/wider_face - annotation: wider_face_split/wider_face_val_bbx_gt.txt - image_dir: WIDER_val/images - drop_last: false - image_shape: [3, 640, 640] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeBox {} - - !ResizeImage - interp: 1 - target_size: 640 - use_cv2: false - - !Permute {} - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [127.502231, 127.502231, 127.502231] - -SSDTestFeed: - batch_size: 1 - use_process: false - dataset: - use_default_label: true - drop_last: false - image_shape: [3, 640, 640] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !ResizeImage - interp: 1 - target_size: 640 - use_cv2: false - - !Permute {} - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [127.502231, 127.502231, 127.502231] diff --git a/PaddleCV/PaddleDetection/configs/face_detection/faceboxes_lite.yml b/PaddleCV/PaddleDetection/configs/face_detection/faceboxes_lite.yml deleted file mode 100644 index 157f0337e4fbda281b3c2fe9cdfd85dd81b51b40..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/face_detection/faceboxes_lite.yml +++ /dev/null @@ -1,130 +0,0 @@ -architecture: FaceBoxes -train_feed: SSDTrainFeed -eval_feed: SSDEvalFeed -test_feed: SSDTestFeed -pretrain_weights: -use_gpu: true -max_iters: 320000 -snapshot_iter: 10000 -log_smooth_window: 20 -log_iter: 20 -metric: WIDERFACE -save_dir: output -weights: output/faceboxes_lite/model_final/ -# 1(label_class) + 1(background) -num_classes: 2 - -FaceBoxes: - backbone: FaceBoxNet - densities: [[2, 1, 1], [1, 1]] - fixed_sizes: [[16., 32., 64.], [96., 128.]] - output_decoder: - keep_top_k: 750 - nms_threshold: 0.3 - nms_top_k: 5000 - score_threshold: 0.01 - -FaceBoxNet: - with_extra_blocks: true - lite_edition: true - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 300000] - -OptimizerBuilder: - optimizer: - momentum: 0.0 - type: RMSPropOptimizer - regularizer: - factor: 0.0005 - type: L2 - -SSDTrainFeed: - batch_size: 8 - use_process: True - dataset: - dataset_dir: dataset/wider_face - annotation: wider_face_split/wider_face_train_bbx_gt.txt - image_dir: WIDER_train/images - image_shape: [3, 640, 640] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeBox {} - - !RandomDistort - brightness_lower: 0.875 - brightness_upper: 1.125 - is_order: true - - !ExpandImage - max_ratio: 4 - prob: 0.5 - - !CropImageWithDataAchorSampling - anchor_sampler: - - [1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0] - batch_sampler: - - [1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0] - target_size: 640 - - !RandomInterpImage - target_size: 640 - - !RandomFlipImage - is_normalized: true - - !Permute {} - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [127.502231, 127.502231, 127.502231] - -SSDEvalFeed: - batch_size: 1 - use_process: false - fields: ['image', 'im_id', 'gt_box'] - dataset: - dataset_dir: dataset/wider_face - annotation: wider_face_split/wider_face_val_bbx_gt.txt - image_dir: WIDER_val/images - drop_last: false - image_shape: [3, 640, 640] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeBox {} - - !ResizeImage - interp: 1 - target_size: 640 - use_cv2: false - - !Permute {} - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [127.502231, 127.502231, 127.502231] - -SSDTestFeed: - batch_size: 1 - use_process: false - dataset: - use_default_label: true - drop_last: false - image_shape: [3, 640, 640] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !ResizeImage - interp: 1 - target_size: 640 - use_cv2: false - - !Permute {} - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [127.502231, 127.502231, 127.502231] diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_r101_1x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_r101_1x.yml deleted file mode 100644 index c72c34d4da8ce429932069d5084f4e710ddba11b..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/faster_rcnn_r101_1x.yml +++ /dev/null @@ -1,115 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -use_gpu: true -max_iters: 180000 -log_smooth_window: 20 -save_dir: output -snapshot_iter: 10000 -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar -metric: COCO -weights: output/faster_rcnn_r101_1x/model_final -num_classes: 81 - -FasterRCNN: - backbone: ResNet - rpn_head: RPNHead - roi_extractor: RoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - norm_type: affine_channel - depth: 101 - feature_maps: 4 - freeze_at: 2 - -ResNetC5: - depth: 101 - norm_type: affine_channel - -RPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - use_random: true - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 12000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 6000 - post_nms_top_n: 1000 - -RoIAlign: - resolution: 14 - sampling_ratio: 0 - spatial_scale: 0.0625 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: ResNetC5 - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - drop_last: false - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_r101_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_r101_fpn_1x.yml deleted file mode 100644 index c11d6f2141a9b15c6a3ef2c50055fbc753338d53..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/faster_rcnn_r101_fpn_1x.yml +++ /dev/null @@ -1,136 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 180000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar -weights: output/faster_rcnn_r101_fpn_1x/model_final -metric: COCO -num_classes: 81 - -FasterRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: affine_channel - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 2000 - pre_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 1000 - pre_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_r101_fpn_2x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_r101_fpn_2x.yml deleted file mode 100644 index 29838c78b60bddb7a92193088354fe0956e2d14b..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/faster_rcnn_r101_fpn_2x.yml +++ /dev/null @@ -1,136 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 360000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar -weights: output/faster_rcnn_r101_fpn_2x/model_final -metric: COCO -num_classes: 81 - -FasterRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: affine_channel - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 2000 - pre_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 1000 - pre_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 320000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_r101_vd_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_r101_vd_fpn_1x.yml deleted file mode 100644 index 2ef717ffc846d325a6e3f3c9b78752250d692f9d..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/faster_rcnn_r101_vd_fpn_1x.yml +++ /dev/null @@ -1,137 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 180000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar -weights: output/faster_rcnn_r101_vd_fpn_1x/model_final -metric: COCO -num_classes: 81 - -FasterRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: affine_channel - variant: d - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 2000 - pre_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 1000 - pre_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_r101_vd_fpn_2x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_r101_vd_fpn_2x.yml deleted file mode 100644 index 763d447c8cfb59c6fcd3045b0e8a34b8da38e73e..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/faster_rcnn_r101_vd_fpn_2x.yml +++ /dev/null @@ -1,137 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 360000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar -weights: output/faster_rcnn_r101_vd_fpn_2x/model_final -metric: COCO -num_classes: 81 - -FasterRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: affine_channel - variant: d - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 2000 - pre_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 1000 - pre_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 320000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_1x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_1x.yml deleted file mode 100644 index 12d349612b3464cff0b945535a8d81a53b434a98..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_1x.yml +++ /dev/null @@ -1,115 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -use_gpu: true -max_iters: 180000 -log_smooth_window: 20 -save_dir: output -snapshot_iter: 10000 -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -metric: COCO -weights: output/faster_rcnn_r50_1x/model_final -num_classes: 81 - -FasterRCNN: - backbone: ResNet - rpn_head: RPNHead - roi_extractor: RoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - norm_type: affine_channel - depth: 50 - feature_maps: 4 - freeze_at: 2 - -ResNetC5: - depth: 50 - norm_type: affine_channel - -RPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - use_random: true - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 12000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 6000 - post_nms_top_n: 1000 - -RoIAlign: - resolution: 14 - sampling_ratio: 0 - spatial_scale: 0.0625 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: ResNetC5 - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - drop_last: false - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_2x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_2x.yml deleted file mode 100644 index 255cd02663845235beac103dad7faaaf03bb90f2..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_2x.yml +++ /dev/null @@ -1,115 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -use_gpu: true -max_iters: 360000 -log_smooth_window: 20 -save_dir: output -snapshot_iter: 10000 -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -metric: COCO -weights: output/faster_rcnn_r50_2x/model_final -num_classes: 81 - -FasterRCNN: - backbone: ResNet - rpn_head: RPNHead - roi_extractor: RoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - norm_type: affine_channel - depth: 50 - feature_maps: 4 - freeze_at: 2 - -ResNetC5: - depth: 50 - norm_type: affine_channel - -RPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - use_random: true - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 12000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 6000 - post_nms_top_n: 1000 - -RoIAlign: - resolution: 14 - sampling_ratio: 0 - spatial_scale: 0.0625 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: ResNetC5 - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 320000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - drop_last: false - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_fpn_1x.yml deleted file mode 100644 index c719106104f1424008db3a079e2e1ac7a3d742b9..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_fpn_1x.yml +++ /dev/null @@ -1,137 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 90000 -use_gpu: true -snapshot_iter: 10000 -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -metric: COCO -weights: output/faster_rcnn_r50_fpn_1x/model_final -num_classes: 81 - -FasterRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - norm_type: bn - norm_decay: 0. - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - -FPN: - min_level: 2 - max_level: 6 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - min_level: 2 - max_level: 6 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_positive_overlap: 0.7 - rpn_negative_overlap: 0.3 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - min_level: 2 - max_level: 5 - box_resolution: 7 - sampling_ratio: 2 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_lo: 0.0 - bg_thresh_hi: 0.5 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.02 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [60000, 80000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - batch_size: 2 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_fpn_2x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_fpn_2x.yml deleted file mode 100644 index 12ae624f6c642ff439a21a90a4f52d1da046c164..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_fpn_2x.yml +++ /dev/null @@ -1,137 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 180000 -use_gpu: true -snapshot_iter: 10000 -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -metric: COCO -weights: output/faster_rcnn_r50_fpn_2x/model_final -num_classes: 81 - -FasterRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - norm_type: affine_channel - norm_decay: 0. - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - -FPN: - min_level: 2 - max_level: 6 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - min_level: 2 - max_level: 6 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_positive_overlap: 0.7 - rpn_negative_overlap: 0.3 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - min_level: 2 - max_level: 5 - box_resolution: 7 - sampling_ratio: 2 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_lo: 0.0 - bg_thresh_hi: 0.5 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.02 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - batch_size: 2 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_vd_1x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_vd_1x.yml deleted file mode 100644 index f39a144a431f5998a8178c41c10ade796d270cb6..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_vd_1x.yml +++ /dev/null @@ -1,117 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -use_gpu: true -max_iters: 180000 -log_smooth_window: 20 -save_dir: output/faster-r50-vd-c4-1x -snapshot_iter: 10000 -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar -metric: COCO -weights: output/faster_rcnn_r50_vd_1x/model_final -num_classes: 81 - -FasterRCNN: - backbone: ResNet - rpn_head: RPNHead - roi_extractor: RoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - norm_type: affine_channel - depth: 50 - feature_maps: 4 - freeze_at: 2 - variant: d - -ResNetC5: - depth: 50 - norm_type: affine_channel - variant: d - -RPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - use_random: true - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 12000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 6000 - post_nms_top_n: 1000 - -RoIAlign: - resolution: 14 - sampling_ratio: 0 - spatial_scale: 0.0625 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: ResNetC5 - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - drop_last: false - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_vd_fpn_2x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_vd_fpn_2x.yml deleted file mode 100644 index 4b944ef9398b3dfebdc4f3731b8ef2522d103e22..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/faster_rcnn_r50_vd_fpn_2x.yml +++ /dev/null @@ -1,137 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 180000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar -weights: output/faster_rcnn_r50_vd_fpn_2x/model_final -metric: COCO -num_classes: 81 - -FasterRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: affine_channel - variant: d - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 2000 - pre_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 1000 - pre_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.02 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - # batch size per device - batch_size: 2 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_se154_vd_fpn_s1x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_se154_vd_fpn_s1x.yml deleted file mode 100644 index c3dd761e6637f568c32d30af2300d87fe5f600ac..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/faster_rcnn_se154_vd_fpn_s1x.yml +++ /dev/null @@ -1,139 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 260000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_pretrained.tar -weights: output/faster_rcnn_se154_vd_fpn_s1x/model_final -metric: COCO -num_classes: 81 - -FasterRCNN: - backbone: SENet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -SENet: - depth: 152 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - group_width: 4 - groups: 64 - norm_type: affine_channel - variant: d - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 2000 - pre_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 1000 - pre_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [200000, 240000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_x101_vd_64x4d_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_x101_vd_64x4d_fpn_1x.yml deleted file mode 100644 index adb607b6e022f3c7c66c121922e2d28e4ba3e1d0..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/faster_rcnn_x101_vd_64x4d_fpn_1x.yml +++ /dev/null @@ -1,142 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 180000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar -weights: output/faster_rcnn_x101_vd_64x4d_fpn_1x/model_final -metric: COCO -num_classes: 81 - -FasterRCNN: - backbone: ResNeXt - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNeXt: - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - group_width: 4 - groups: 64 - norm_type: affine_channel - variant: d - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 2000 - pre_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 1000 - pre_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - values: null - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - shuffle: true - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - shuffle: false diff --git a/PaddleCV/PaddleDetection/configs/faster_rcnn_x101_vd_64x4d_fpn_2x.yml b/PaddleCV/PaddleDetection/configs/faster_rcnn_x101_vd_64x4d_fpn_2x.yml deleted file mode 100644 index ee36efbe859ab42d7391d2867bf34d45b20b340f..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/faster_rcnn_x101_vd_64x4d_fpn_2x.yml +++ /dev/null @@ -1,141 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 360000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar -weights: output/faster_rcnn_x101_vd_64x4d_fpn_1x/model_final -metric: COCO -num_classes: 81 - -FasterRCNN: - backbone: ResNeXt - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNeXt: - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - group_width: 4 - groups: 64 - norm_type: affine_channel - variant: d - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 2000 - pre_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 1000 - pre_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 320000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - shuffle: true - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - shuffle: false diff --git a/PaddleCV/PaddleDetection/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x.yml b/PaddleCV/PaddleDetection/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x.yml deleted file mode 100644 index 52c61ad4b57bf1464fe9e2816cec710563a9d707..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x.yml +++ /dev/null @@ -1,147 +0,0 @@ -architecture: CascadeMaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -max_iters: 180000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -weights: output/cascade_mask_rcnn_r50_fpn_gn_2x/model_final/ -metric: COCO -num_classes: 81 - -CascadeMaskRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: CascadeBBoxHead - bbox_assigner: CascadeBBoxAssigner - mask_head: MaskHead - mask_assigner: MaskAssigner - -ResNet: - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: affine_channel - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - norm_type: gn - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - sampling_ratio: 2 - box_resolution: 7 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - norm_type: gn - -CascadeBBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [10, 20, 30] - bg_thresh_hi: [0.5, 0.6, 0.7] - bg_thresh_lo: [0.0, 0.0, 0.0] - fg_fraction: 0.25 - fg_thresh: [0.5, 0.6, 0.7] - -MaskAssigner: - resolution: 28 - -CascadeBBoxHead: - head: CascadeXConvNormHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -CascadeXConvNormHead: - norm_type: gn - -LearningRate: - base_lr: 0.02 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - batch_size: 2 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/gn/faster_rcnn_r50_fpn_gn_2x.yml b/PaddleCV/PaddleDetection/configs/gn/faster_rcnn_r50_fpn_gn_2x.yml deleted file mode 100644 index a86deb4f6efab685cce14a329bd3b79f14ba36d8..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/gn/faster_rcnn_r50_fpn_gn_2x.yml +++ /dev/null @@ -1,137 +0,0 @@ -architecture: FasterRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 180000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -metric: COCO -weights: output/faster_rcnn_r50_fpn_gn/model_final -num_classes: 81 - -FasterRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: affine_channel - -FPN: - min_level: 2 - max_level: 6 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - norm_type: gn - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - min_level: 2 - max_level: 6 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_positive_overlap: 0.7 - rpn_negative_overlap: 0.3 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - min_level: 2 - max_level: 5 - box_resolution: 7 - sampling_ratio: 2 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_lo: 0.0 - bg_thresh_hi: 0.5 - fg_fraction: 0.25 - fg_thresh: 0.5 - -BBoxHead: - head: XConvNormHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -XConvNormHead: - norm_type: gn - -LearningRate: - base_lr: 0.02 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - batch_size: 2 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 16 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/gn/mask_rcnn_r50_fpn_gn_2x.yml b/PaddleCV/PaddleDetection/configs/gn/mask_rcnn_r50_fpn_gn_2x.yml deleted file mode 100644 index bffe3ba45a78d5ddcbeb20fa15a85ebd9f034532..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/gn/mask_rcnn_r50_fpn_gn_2x.yml +++ /dev/null @@ -1,145 +0,0 @@ -architecture: MaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -max_iters: 360000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -weights: output/mask_rcnn_r50_fpn_gn_2x/model_final/ -metric: COCO -num_classes: 81 - -MaskRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: affine_channel - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - norm_type: gn - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - sampling_ratio: 2 - box_resolution: 7 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - norm_type: gn - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -MaskAssigner: - resolution: 28 - -BBoxHead: - head: XConvNormHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -XConvNormHead: - norm_type: gn - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 320000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/mask_rcnn_r101_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/mask_rcnn_r101_fpn_1x.yml deleted file mode 100644 index 12229a074fbcf1549a3d7581fcd91bd6c124d516..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/mask_rcnn_r101_fpn_1x.yml +++ /dev/null @@ -1,143 +0,0 @@ -architecture: MaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -use_gpu: true -max_iters: 180000 -snapshot_iter: 10000 -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar -metric: COCO -weights: output/mask_rcnn_r101_fpn_1x/model_final/ -num_classes: 81 - -MaskRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: affine_channel - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - sampling_ratio: 2 - box_resolution: 7 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -MaskAssigner: - resolution: 28 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/mask_rcnn_r101_vd_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/mask_rcnn_r101_vd_fpn_1x.yml deleted file mode 100644 index 91bb40c2f0d920ca1961e93a5e7adf804ff28d1b..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/mask_rcnn_r101_vd_fpn_1x.yml +++ /dev/null @@ -1,144 +0,0 @@ -architecture: MaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -max_iters: 180000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar -weights: output/mask_rcnn_r101_vd_fpn_1x/model_final -metric: COCO -num_classes: 81 - -MaskRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: affine_channel - variant: d - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - sampling_ratio: 2 - box_resolution: 7 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -MaskAssigner: - resolution: 28 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_1x.yml b/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_1x.yml deleted file mode 100644 index 6c3dd8418e55c899c1ea0e0bc7db1d39d9a0be9d..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_1x.yml +++ /dev/null @@ -1,123 +0,0 @@ -architecture: MaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -use_gpu: true -max_iters: 180000 -snapshot_iter: 10000 -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -metric: COCO -weights: output/mask_rcnn_r50_1x/model_final -num_classes: 81 - -MaskRCNN: - backbone: ResNet - rpn_head: RPNHead - roi_extractor: RoIAlign - bbox_assigner: BBoxAssigner - bbox_head: BBoxHead - mask_assigner: MaskAssigner - mask_head: MaskHead - -ResNet: - norm_type: affine_channel - norm_decay: 0. - depth: 50 - feature_maps: 4 - freeze_at: 2 - -ResNetC5: - depth: 50 - norm_type: affine_channel - -RPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 12000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 6000 - post_nms_top_n: 1000 - -RoIAlign: - resolution: 14 - spatial_scale: 0.0625 - sampling_ratio: 0 - -BBoxHead: - head: ResNetC5 - nms: - keep_top_k: 100 - nms_threshold: 0.5 - normalized: false - score_threshold: 0.05 - -MaskHead: - dilation: 1 - conv_dim: 256 - resolution: 14 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -MaskAssigner: - resolution: 14 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json diff --git a/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_2x.yml b/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_2x.yml deleted file mode 100644 index 091b0cf89bfe44e74ac807bfb0f1a7a8ea1a6454..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_2x.yml +++ /dev/null @@ -1,125 +0,0 @@ -architecture: MaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -use_gpu: true -max_iters: 360000 -snapshot_iter: 10000 -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -metric: COCO -weights: output/mask_rcnn_r50_2x/model_final/ -num_classes: 81 - -MaskRCNN: - backbone: ResNet - rpn_head: RPNHead - roi_extractor: RoIAlign - bbox_assigner: BBoxAssigner - bbox_head: BBoxHead - mask_assigner: MaskAssigner - mask_head: MaskHead - - -ResNet: - norm_type: affine_channel - norm_decay: 0. - depth: 50 - feature_maps: 4 - freeze_at: 2 - -ResNetC5: - depth: 50 - norm_type: affine_channel - -RPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 12000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 6000 - post_nms_top_n: 1000 - -RoIAlign: - resolution: 14 - spatial_scale: 0.0625 - sampling_ratio: 0 - -BBoxHead: - head: ResNetC5 - nms: - keep_top_k: 100 - nms_threshold: 0.5 - normalized: false - score_threshold: 0.05 - -MaskHead: - dilation: 1 - conv_dim: 256 - resolution: 14 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -MaskAssigner: - resolution: 14 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 320000] - #start the warm up from base_lr * start_factor - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json diff --git a/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_fpn_1x.yml deleted file mode 100644 index a889ea283f445974e11d38479162b388d69ec3ad..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_fpn_1x.yml +++ /dev/null @@ -1,143 +0,0 @@ -architecture: MaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -use_gpu: true -max_iters: 180000 -snapshot_iter: 10000 -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -metric: COCO -weights: output/mask_rcnn_r50_fpn_1x/model_final/ -num_classes: 81 - -MaskRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: bn - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - sampling_ratio: 2 - box_resolution: 7 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -MaskAssigner: - resolution: 28 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_fpn_2x.yml b/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_fpn_2x.yml deleted file mode 100644 index 08977bba39b688920f427442c01510f93efa412b..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_fpn_2x.yml +++ /dev/null @@ -1,143 +0,0 @@ -architecture: MaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -max_iters: 360000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -weights: output/mask_rcnn_r50_fpn_2x/model_final/ -metric: COCO -num_classes: 81 - -MaskRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: affine_channel - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - sampling_ratio: 2 - box_resolution: 7 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -MaskAssigner: - resolution: 28 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 320000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_vd_fpn_2x.yml b/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_vd_fpn_2x.yml deleted file mode 100644 index 12a5057528eaada329d70058acf3aa9a13727237..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/mask_rcnn_r50_vd_fpn_2x.yml +++ /dev/null @@ -1,145 +0,0 @@ -architecture: MaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -use_gpu: true -max_iters: 360000 -snapshot_iter: 10000 -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar -metric: COCO -weights: output/mask_rcnn_r50_vd_fpn_2x/model_final/ -num_classes: 81 - -MaskRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNet: - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - norm_type: affine_channel - variant: d - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -MaskAssigner: - resolution: 28 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 320000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/mask_rcnn_se154_vd_fpn_s1x.yml b/PaddleCV/PaddleDetection/configs/mask_rcnn_se154_vd_fpn_s1x.yml deleted file mode 100644 index 4430055712504bfe79252c1614792d05fb46b89b..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/mask_rcnn_se154_vd_fpn_s1x.yml +++ /dev/null @@ -1,147 +0,0 @@ -architecture: MaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -max_iters: 260000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_pretrained.tar -weights: output/mask_rcnn_se154_vd_fpn_s1x/model_final/ -metric: COCO -num_classes: 81 - -MaskRCNN: - backbone: SENet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -SENet: - depth: 152 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - group_width: 4 - groups: 64 - norm_type: affine_channel - variant: d - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - box_resolution: 7 - sampling_ratio: 2 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -MaskAssigner: - resolution: 28 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [200000, 240000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - # batch size per device - batch_size: 1 - dataset: - dataset_dir: dataset/coco - image_dir: train2017 - annotation: annotations/instances_train2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/mask_rcnn_x101_vd_64x4d_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/mask_rcnn_x101_vd_64x4d_fpn_1x.yml deleted file mode 100644 index 75653ce2462e91b3be3290c5e6a5a3c12d816d71..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/mask_rcnn_x101_vd_64x4d_fpn_1x.yml +++ /dev/null @@ -1,146 +0,0 @@ -architecture: MaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -max_iters: 180000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar -weights: output/mask_rcnn_x101_vd_64x4d_fpn_1x/model_final -metric: COCO -num_classes: 81 - -MaskRCNN: - backbone: ResNeXt - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNeXt: - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - group_width: 4 - groups: 64 - norm_type: affine_channel - variant: d - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - sampling_ratio: 2 - box_resolution: 7 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -MaskAssigner: - resolution: 28 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/mask_rcnn_x101_vd_64x4d_fpn_2x.yml b/PaddleCV/PaddleDetection/configs/mask_rcnn_x101_vd_64x4d_fpn_2x.yml deleted file mode 100644 index c5a711363330092aee30c2b4d9b08dd899f97beb..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/mask_rcnn_x101_vd_64x4d_fpn_2x.yml +++ /dev/null @@ -1,146 +0,0 @@ -architecture: MaskRCNN -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed -max_iters: 360000 -snapshot_iter: 10000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar -weights: output/mask_rcnn_x101_vd_64x4d_fpn_2x/model_final -metric: COCO -num_classes: 81 - -MaskRCNN: - backbone: ResNeXt - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: BBoxHead - bbox_assigner: BBoxAssigner - -ResNeXt: - depth: 101 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - group_width: 4 - groups: 64 - norm_type: affine_channel - variant: d - -FPN: - max_level: 6 - min_level: 2 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - aspect_ratios: [0.5, 1.0, 2.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - max_level: 6 - min_level: 2 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - max_level: 5 - min_level: 2 - sampling_ratio: 2 - box_resolution: 7 - mask_resolution: 14 - -MaskHead: - dilation: 1 - conv_dim: 256 - num_convs: 4 - resolution: 28 - -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [0.1, 0.1, 0.2, 0.2] - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - -MaskAssigner: - resolution: 28 - -BBoxHead: - head: TwoFCHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -TwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [240000, 320000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -MaskRCNNTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 - -MaskRCNNTestFeed: - batch_size: 1 - dataset: - annotation: annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/obj365/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas.yml b/PaddleCV/PaddleDetection/configs/obj365/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas.yml deleted file mode 100644 index be80042520367c1bb2d63ac2998651d789c9e298..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/obj365/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas.yml +++ /dev/null @@ -1,247 +0,0 @@ -architecture: CascadeRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 500000 -snapshot_iter: 10000 -use_gpu: true -log_iter: 20 -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_coco_pretrained.tar -weights: output/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas/model_final -metric: COCO -num_classes: 81 - -CascadeRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: CascadeBBoxHead - bbox_assigner: CascadeBBoxAssigner - -SENet: - depth: 152 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - group_width: 4 - groups: 64 - norm_type: bn - freeze_norm: True - variant: d - dcn_v2_stages: [3, 4, 5] - std_senet: True - -FPN: - min_level: 2 - max_level: 6 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - freeze_norm: False - norm_type: gn - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - min_level: 2 - max_level: 6 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_positive_overlap: 0.7 - rpn_negative_overlap: 0.3 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - min_level: 2 - max_level: 5 - box_resolution: 7 - sampling_ratio: 2 - -CascadeBBoxAssigner: - batch_size_per_im: 1024 - bbox_reg_weights: [10, 20, 30] - bg_thresh_lo: [0.0, 0.0, 0.0] - bg_thresh_hi: [0.5, 0.6, 0.7] - fg_thresh: [0.5, 0.6, 0.7] - fg_fraction: 0.25 - -CascadeBBoxHead: - head: CascadeXConvNormHead - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -CascadeXConvNormHead: - norm_type: gn - -CascadeTwoFCHead: - mlp_dim: 1024 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [400000, 460000] - - !LinearWarmup - start_factor: 0.01 - steps: 2000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/objects365 - annotation: annotations/train.json - image_dir: train - sample_transforms: - - !DecodeImage - to_rgb: False - with_mixup: False - - !RandomFlipImage - is_mask_flip: true - is_normalized: false - prob: 0.5 - - !NormalizeImage - is_channel_first: false - is_scale: False - mean: - - 102.9801 - - 115.9465 - - 122.7717 - std: - - 1.0 - - 1.0 - - 1.0 - - !ResizeImage - interp: 1 - target_size: - - 416 - - 448 - - 480 - - 512 - - 544 - - 576 - - 608 - - 640 - - 672 - - 704 - - 736 - - 768 - - 800 - - 832 - - 864 - - 896 - - 928 - - 960 - - 992 - - 1024 - - 1056 - - 1088 - - 1120 - - 1152 - - 1184 - - 1216 - - 1248 - - 1280 - - 1312 - - 1344 - - 1376 - - 1408 - max_size: 1600 - use_cv2: true - - !Permute - channel_first: true - to_bgr: false - batch_transforms: - - !PadBatch - pad_to_stride: 32 - num_workers: 4 - class_aware_sampling: true - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/objects365 - annotation: annotations/val.json - image_dir: val - sample_transforms: - - !DecodeImage - to_rgb: False - with_mixup: False - - !NormalizeImage - is_channel_first: false - is_scale: False - mean: - - 102.9801 - - 115.9465 - - 122.7717 - std: - - 1.0 - - 1.0 - - 1.0 - - !ResizeImage - target_size: 800 - max_size: 1333 - interp: 1 - - !Permute - channel_first: true - to_bgr: false - batch_transforms: - - !PadBatch - pad_to_stride: 32 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/obj365/annotations/val.json - sample_transforms: - - !DecodeImage - to_rgb: False - with_mixup: False - - !NormalizeImage - is_channel_first: false - is_scale: False - mean: - - 102.9801 - - 115.9465 - - 122.7717 - std: - - 1.0 - - 1.0 - - 1.0 - - !Permute - channel_first: true - to_bgr: false - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/retinanet_r101_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/retinanet_r101_fpn_1x.yml deleted file mode 100644 index 1864f379830292f1cc6a659995a71783688f61c3..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/retinanet_r101_fpn_1x.yml +++ /dev/null @@ -1,105 +0,0 @@ -architecture: RetinaNet -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 90000 -use_gpu: true -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar -weights: output/retinanet_r101_fpn_1x/model_final -log_smooth_window: 20 -snapshot_iter: 10000 -metric: COCO -save_dir: output -num_classes: 81 - -RetinaNet: - backbone: ResNet - fpn: FPN - retina_head: RetinaHead - -ResNet: - norm_type: affine_channel - norm_decay: 0. - depth: 101 - feature_maps: [3, 4, 5] - freeze_at: 2 - -FPN: - max_level: 7 - min_level: 3 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125] - has_extra_convs: true - -RetinaHead: - num_convs_per_octave: 4 - num_chan: 256 - max_level: 7 - min_level: 3 - prior_prob: 0.01 - base_scale: 4 - num_scales_per_octave: 3 - anchor_generator: - aspect_ratios: [1.0, 2.0, 0.5] - variance: [1.0, 1.0, 1.0, 1.0] - target_assign: - positive_overlap: 0.5 - negative_overlap: 0.4 - gamma: 2.0 - alpha: 0.25 - sigma: 3.0151134457776365 - output_decoder: - score_thresh: 0.05 - nms_thresh: 0.5 - pre_nms_top_n: 1000 - detections_per_im: 100 - nms_eta: 1.0 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [60000, 80000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - batch_size: 2 - batch_transforms: - - !PadBatch - pad_to_stride: 128 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 2 - batch_transforms: - - !PadBatch - pad_to_stride: 128 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - batch_transforms: - - !PadBatch - pad_to_stride: 128 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/retinanet_r50_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/retinanet_r50_fpn_1x.yml deleted file mode 100644 index 8c24ef4defd9e947406591a17ae2feb4833c61a1..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/retinanet_r50_fpn_1x.yml +++ /dev/null @@ -1,105 +0,0 @@ -architecture: RetinaNet -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 90000 -use_gpu: true -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -weights: output/retinanet_r50_fpn_1x/model_final -log_smooth_window: 20 -snapshot_iter: 10000 -metric: COCO -save_dir: output -num_classes: 81 - -RetinaNet: - backbone: ResNet - fpn: FPN - retina_head: RetinaHead - -ResNet: - norm_type: affine_channel - norm_decay: 0. - depth: 50 - feature_maps: [3, 4, 5] - freeze_at: 2 - -FPN: - max_level: 7 - min_level: 3 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125] - has_extra_convs: true - -RetinaHead: - num_convs_per_octave: 4 - num_chan: 256 - max_level: 7 - min_level: 3 - prior_prob: 0.01 - base_scale: 4 - num_scales_per_octave: 3 - anchor_generator: - aspect_ratios: [1.0, 2.0, 0.5] - variance: [1.0, 1.0, 1.0, 1.0] - target_assign: - positive_overlap: 0.5 - negative_overlap: 0.4 - gamma: 2.0 - alpha: 0.25 - sigma: 3.0151134457776365 - output_decoder: - score_thresh: 0.05 - nms_thresh: 0.5 - pre_nms_top_n: 1000 - detections_per_im: 100 - nms_eta: 1.0 - -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [60000, 80000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - batch_size: 2 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 128 - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 2 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 128 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 128 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/retinanet_x101_vd_64x4d_fpn_1x.yml b/PaddleCV/PaddleDetection/configs/retinanet_x101_vd_64x4d_fpn_1x.yml deleted file mode 100644 index 6cc33fafa485c4a64cefef67ec0d6ab0b237db84..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/retinanet_x101_vd_64x4d_fpn_1x.yml +++ /dev/null @@ -1,108 +0,0 @@ -architecture: RetinaNet -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 180000 -use_gpu: true -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar -weights: output/retinanet_x101_vd_64x4d_fpn_1x/model_final -log_smooth_window: 20 -log_iter: 20 -snapshot_iter: 30000 -metric: COCO -save_dir: output -num_classes: 81 - -RetinaNet: - backbone: ResNeXt - fpn: FPN - retina_head: RetinaHead - -ResNeXt: - depth: 101 - feature_maps: [3, 4, 5] - freeze_at: 2 - group_width: 4 - groups: 64 - norm_type: bn - variant: d - -FPN: - max_level: 7 - min_level: 3 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125] - has_extra_convs: true - -RetinaHead: - num_convs_per_octave: 4 - num_chan: 256 - max_level: 7 - min_level: 3 - prior_prob: 0.01 - base_scale: 4 - num_scales_per_octave: 3 - anchor_generator: - aspect_ratios: [1.0, 2.0, 0.5] - variance: [1.0, 1.0, 1.0, 1.0] - target_assign: - positive_overlap: 0.5 - negative_overlap: 0.4 - gamma: 2.0 - alpha: 0.25 - sigma: 3.0151134457776365 - output_decoder: - score_thresh: 0.05 - nms_thresh: 0.5 - pre_nms_top_n: 1000 - detections_per_im: 100 - nms_eta: 1.0 - -LearningRate: - base_lr: 0.005 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [120000, 160000] - - !LinearWarmup - start_factor: 0.1 - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 128 - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 128 - num_workers: 2 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 128 - num_workers: 2 diff --git a/PaddleCV/PaddleDetection/configs/ssd/ssd_mobilenet_v1_voc.yml b/PaddleCV/PaddleDetection/configs/ssd/ssd_mobilenet_v1_voc.yml deleted file mode 100644 index a360830f05f4a1d56bdb915fd95e9a905460c8e5..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/ssd/ssd_mobilenet_v1_voc.yml +++ /dev/null @@ -1,80 +0,0 @@ -architecture: SSD -train_feed: SSDTrainFeed -eval_feed: SSDEvalFeed -test_feed: SSDTestFeed -pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_coco_pretrained.tar -use_gpu: true -max_iters: 28000 -snapshot_iter: 2000 -log_smooth_window: 1 -metric: VOC -map_type: 11point -save_dir: output -weights: output/ssd_mobilenet_v1_voc/model_final/ -# 20(label_class) + 1(background) -num_classes: 21 - -SSD: - backbone: MobileNet - multi_box_head: MultiBoxHead - output_decoder: - background_label: 0 - keep_top_k: 200 - nms_eta: 1.0 - nms_threshold: 0.45 - nms_top_k: 400 - score_threshold: 0.01 - -MobileNet: - norm_decay: 0. - conv_group_scale: 1 - conv_learning_rate: 0.1 - extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]] - with_extra_blocks: true - -MultiBoxHead: - aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]] - base_size: 300 - flip: true - max_ratio: 90 - max_sizes: [[], 150.0, 195.0, 240.0, 285.0, 300.0] - min_ratio: 20 - min_sizes: [60.0, 105.0, 150.0, 195.0, 240.0, 285.0] - offset: 0.5 - -LearningRate: - schedulers: - - !PiecewiseDecay - milestones: [10000, 15000, 20000, 25000] - values: [0.001, 0.0005, 0.00025, 0.0001, 0.00001] - -OptimizerBuilder: - optimizer: - momentum: 0.0 - type: RMSPropOptimizer - regularizer: - factor: 0.00005 - type: L2 - -SSDTrainFeed: - batch_size: 32 - use_process: true - dataset: - dataset_dir: dataset/voc - annotation: trainval.txt - use_default_label: true - -SSDEvalFeed: - batch_size: 64 - use_process: true - dataset: - dataset_dir: dataset/voc - annotation: test.txt - use_default_label: true - drop_last: false - -SSDTestFeed: - batch_size: 1 - dataset: - use_default_label: true - drop_last: false diff --git a/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_300.yml b/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_300.yml deleted file mode 100644 index f5e987bcb156a3a4973f9ae9d376d38597471ce2..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_300.yml +++ /dev/null @@ -1,149 +0,0 @@ -architecture: SSD -train_feed: SSDTrainFeed -eval_feed: SSDEvalFeed -test_feed: SSDTestFeed -use_gpu: true -max_iters: 400000 -snapshot_iter: 10000 -log_smooth_window: 20 -log_iter: 20 -metric: COCO -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_caffe_pretrained.tar -save_dir: output -weights: output/ssd_vgg16_300/model_final -num_classes: 81 - -SSD: - backbone: VGG - multi_box_head: MultiBoxHead - output_decoder: - background_label: 0 - keep_top_k: 200 - nms_eta: 1.0 - nms_threshold: 0.45 - nms_top_k: 400 - score_threshold: 0.01 - -VGG: - depth: 16 - with_extra_blocks: true - normalizations: [20., -1, -1, -1, -1, -1] - -MultiBoxHead: - base_size: 300 - aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]] - min_ratio: 15 - max_ratio: 90 - min_sizes: [30.0, 60.0, 111.0, 162.0, 213.0, 264.0] - max_sizes: [60.0, 111.0, 162.0, 213.0, 264.0, 315.0] - steps: [8, 16, 32, 64, 100, 300] - offset: 0.5 - flip: true - kernel_size: 3 - pad: 1 - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [280000, 360000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0005 - type: L2 - -SSDTrainFeed: - batch_size: 8 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - image_shape: [3, 300, 300] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeBox {} - - !RandomDistort - brightness_lower: 0.875 - brightness_upper: 1.125 - is_order: true - - !ExpandImage - max_ratio: 4 - mean: [104, 117, 123] - prob: 0.5 - - !CropImage - avoid_no_bbox: true - batch_sampler: - - [1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0] - satisfy_all: false - - !ResizeImage - interp: 1 - target_size: 300 - use_cv2: false - - !RandomFlipImage - is_normalized: true - - !Permute - to_bgr: false - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [1, 1, 1] - -SSDEvalFeed: - batch_size: 16 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - drop_last: false - image_shape: [3, 300, 300] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !ResizeImage - interp: 1 - target_size: 300 - use_cv2: false - - !Permute - to_bgr: false - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [1, 1, 1] - -SSDTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - image_shape: [3, 300, 300] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !ResizeImage - interp: 1 - max_size: 0 - target_size: 300 - use_cv2: false - - !Permute - to_bgr: false - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [1, 1, 1] diff --git a/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_300_voc.yml b/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_300_voc.yml deleted file mode 100644 index 36c0537591f7771eb9c4814cdd8e4c77338e6a9f..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_300_voc.yml +++ /dev/null @@ -1,151 +0,0 @@ -architecture: SSD -train_feed: SSDTrainFeed -eval_feed: SSDEvalFeed -test_feed: SSDTestFeed -use_gpu: true -max_iters: 120001 -snapshot_iter: 10000 -log_smooth_window: 20 -log_iter: 20 -metric: VOC -map_type: 11point -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_caffe_pretrained.tar -save_dir: output -weights: output/ssd_vgg16_300_voc/model_final/ -# 20(label_class) + 1(background) -num_classes: 21 - -SSD: - backbone: VGG - multi_box_head: MultiBoxHead - output_decoder: - background_label: 0 - keep_top_k: 200 - nms_eta: 1.0 - nms_threshold: 0.45 - nms_top_k: 400 - score_threshold: 0.01 - -VGG: - depth: 16 - with_extra_blocks: true - normalizations: [20., -1, -1, -1, -1, -1] - -MultiBoxHead: - base_size: 300 - aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]] - min_ratio: 20 - max_ratio: 90 - min_sizes: [30.0, 60.0, 111.0, 162.0, 213.0, 264.0] - max_sizes: [60.0, 111.0, 162.0, 213.0, 264.0, 315.0] - steps: [8, 16, 32, 64, 100, 300] - offset: 0.5 - flip: true - min_max_aspect_ratios_order: true - kernel_size: 3 - pad: 1 - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [80000, 100000] - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0005 - type: L2 - -SSDTrainFeed: - batch_size: 8 - dataset: - dataset_dir: dataset/voc - annotation: trainval.txt - use_default_label: true - image_shape: [3, 300, 300] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeBox {} - - !RandomDistort - brightness_lower: 0.875 - brightness_upper: 1.125 - is_order: true - - !ExpandImage - max_ratio: 4 - mean: [104, 117, 123] - prob: 0.5 - - !CropImage - avoid_no_bbox: true - batch_sampler: - - [1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0] - satisfy_all: false - - !ResizeImage - interp: 1 - target_size: 300 - use_cv2: False - - !RandomFlipImage - is_normalized: true - - !Permute - to_bgr: false - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [1, 1, 1] - -SSDEvalFeed: - batch_size: 32 - dataset: - dataset_dir: dataset/voc - annotation: test.txt - use_default_label: true - drop_last: false - image_shape: [3, 300, 300] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeBox {} - - !ResizeImage - interp: 1 - target_size: 300 - use_cv2: false - - !Permute - to_bgr: false - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [1, 1, 1] - -SSDTestFeed: - batch_size: 1 - dataset: - use_default_label: true - drop_last: false - image_shape: [3, 300, 300] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !ResizeImage - interp: 1 - max_size: 0 - target_size: 300 - use_cv2: false - - !Permute - to_bgr: false - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [1, 1, 1] diff --git a/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_512.yml b/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_512.yml deleted file mode 100644 index 6214327642bfe3beb09e28cd7dbe891e8d49b848..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_512.yml +++ /dev/null @@ -1,151 +0,0 @@ -architecture: SSD -train_feed: SSDTrainFeed -eval_feed: SSDEvalFeed -test_feed: SSDTestFeed -use_gpu: true -max_iters: 400000 -snapshot_iter: 10000 -log_smooth_window: 20 -log_iter: 20 -metric: COCO -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_caffe_pretrained.tar -save_dir: output -weights: output/ssd_vgg16_512/model_final -num_classes: 81 - -SSD: - backbone: VGG - multi_box_head: MultiBoxHead - output_decoder: - background_label: 0 - keep_top_k: 200 - nms_eta: 1.0 - nms_threshold: 0.45 - nms_top_k: 400 - score_threshold: 0.01 - -VGG: - depth: 16 - with_extra_blocks: true - normalizations: [20., -1, -1, -1, -1, -1, -1] - extra_block_filters: [[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 1, 4]] - - -MultiBoxHead: - base_size: 512 - aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]] - min_ratio: 15 - max_ratio: 90 - min_sizes: [20.0, 51.0, 133.0, 215.0, 296.0, 378.0, 460.0] - max_sizes: [51.0, 133.0, 215.0, 296.0, 378.0, 460.0, 542.0] - steps: [8, 16, 32, 64, 128, 256, 512] - offset: 0.5 - flip: true - kernel_size: 3 - pad: 1 - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [280000, 360000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0005 - type: L2 - -SSDTrainFeed: - batch_size: 8 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - image_shape: [3, 512, 512] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeBox {} - - !RandomDistort - brightness_lower: 0.875 - brightness_upper: 1.125 - is_order: true - - !ExpandImage - max_ratio: 4 - mean: [104, 117, 123] - prob: 0.5 - - !CropImage - avoid_no_bbox: true - batch_sampler: - - [1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0] - satisfy_all: false - - !ResizeImage - interp: 1 - target_size: 512 - use_cv2: false - - !RandomFlipImage - is_normalized: true - - !Permute - to_bgr: false - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [1, 1, 1] - -SSDEvalFeed: - batch_size: 8 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - drop_last: false - image_shape: [3, 512, 512] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !ResizeImage - interp: 1 - target_size: 512 - use_cv2: false - - !Permute - to_bgr: false - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [1, 1, 1] - -SSDTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - image_shape: [3, 512, 512] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !ResizeImage - interp: 1 - max_size: 0 - target_size: 512 - use_cv2: false - - !Permute - to_bgr: false - - !NormalizeImage - is_scale: false - mean: [104, 117, 123] - std: [1, 1, 1] diff --git a/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_512_voc.yml b/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_512_voc.yml deleted file mode 100644 index b2028e0b11e6d05d061788315de2630ac0ede52e..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_512_voc.yml +++ /dev/null @@ -1,155 +0,0 @@ -architecture: SSD -train_feed: SSDTrainFeed -eval_feed: SSDEvalFeed -test_feed: SSDTestFeed -use_gpu: true -max_iters: 120000 -snapshot_iter: 10000 -log_smooth_window: 20 -log_iter: 20 -metric: VOC -map_type: 11point -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_caffe_pretrained.tar -save_dir: output -weights: output/ssd_vgg16_512_voc/model_final/ -# 20(label_class) + 1(background) -num_classes: 21 - -SSD: - backbone: VGG - multi_box_head: MultiBoxHead - output_decoder: - background_label: 0 - keep_top_k: 200 - nms_eta: 1.0 - nms_threshold: 0.45 - nms_top_k: 400 - score_threshold: 0.01 - -VGG: - depth: 16 - with_extra_blocks: true - normalizations: [20., -1, -1, -1, -1, -1, -1] - extra_block_filters: [[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 1, 1, 4]] - - -MultiBoxHead: - base_size: 512 - aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]] - min_ratio: 20 - max_ratio: 90 - min_sizes: [20.0, 51.0, 133.0, 215.0, 296.0, 378.0, 460.0] - max_sizes: [51.0, 133.0, 215.0, 296.0, 378.0, 460.0, 542.0] - steps: [8, 16, 32, 64, 128, 256, 512] - offset: 0.5 - flip: true - kernel_size: 3 - pad: 1 - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [80000, 100000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0005 - type: L2 - -SSDTrainFeed: - batch_size: 8 - dataset: - dataset_dir: dataset/voc - annotation: trainval.txt - use_default_label: true - image_shape: [3, 512, 512] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeBox {} - - !RandomDistort - brightness_lower: 0.875 - brightness_upper: 1.125 - is_order: true - - !ExpandImage - max_ratio: 4 - mean: [123, 117, 104] - prob: 0.5 - - !CropImage - avoid_no_bbox: true - batch_sampler: - - [1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0] - satisfy_all: false - - !ResizeImage - interp: 1 - target_size: 512 - use_cv2: false - - !RandomFlipImage - is_normalized: true - - !Permute - to_bgr: false - - !NormalizeImage - is_scale: false - mean: [123, 117, 104] - std: [1, 1, 1] - -SSDEvalFeed: - batch_size: 32 - dataset: - dataset_dir: dataset/voc - annotation: test.txt - use_default_label: true - drop_last: false - image_shape: [3, 512, 512] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeBox {} - - !ResizeImage - interp: 1 - target_size: 512 - use_cv2: false - - !Permute - to_bgr: false - - !NormalizeImage - is_scale: false - mean: [123, 117, 104] - std: [1, 1, 1] - -SSDTestFeed: - batch_size: 1 - dataset: - use_default_label: true - drop_last: false - image_shape: [3, 512, 512] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !ResizeImage - interp: 1 - max_size: 0 - target_size: 512 - use_cv2: false - - !Permute - to_bgr: false - - !NormalizeImage - is_scale: false - mean: [123, 117, 104] - std: [1, 1, 1] diff --git a/PaddleCV/PaddleDetection/configs/yolov3_darknet.yml b/PaddleCV/PaddleDetection/configs/yolov3_darknet.yml deleted file mode 100644 index 9a1c243b8b245c4d0cb60e6ea9f57778e896d263..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/yolov3_darknet.yml +++ /dev/null @@ -1,82 +0,0 @@ -architecture: YOLOv3 -train_feed: YoloTrainFeed -eval_feed: YoloEvalFeed -test_feed: YoloTestFeed -use_gpu: true -max_iters: 500200 -log_smooth_window: 20 -save_dir: output -snapshot_iter: 2000 -metric: COCO -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar -weights: output/yolov3_darknet/model_final -num_classes: 80 - -YOLOv3: - backbone: DarkNet - yolo_head: YOLOv3Head - -DarkNet: - norm_type: sync_bn - norm_decay: 0. - depth: 53 - -YOLOv3Head: - anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - anchors: [[10, 13], [16, 30], [33, 23], - [30, 61], [62, 45], [59, 119], - [116, 90], [156, 198], [373, 326]] - norm_decay: 0. - ignore_thresh: 0.7 - label_smooth: true - nms: - background_label: -1 - keep_top_k: 100 - nms_threshold: 0.45 - nms_top_k: 1000 - normalized: false - score_threshold: 0.01 - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: - - 400000 - - 450000 - - !LinearWarmup - start_factor: 0. - steps: 4000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0005 - type: L2 - -YoloTrainFeed: - batch_size: 8 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - num_workers: 8 - bufsize: 128 - use_process: true - -YoloEvalFeed: - batch_size: 8 - image_shape: [3, 608, 608] - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - -YoloTestFeed: - batch_size: 1 - image_shape: [3, 608, 608] - dataset: - annotation: dataset/coco/annotations/instances_val2017.json diff --git a/PaddleCV/PaddleDetection/configs/yolov3_darknet_voc.yml b/PaddleCV/PaddleDetection/configs/yolov3_darknet_voc.yml deleted file mode 100644 index 876d380ffcd4f59bcdeebd8fa0e51e98a1e58f93..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/yolov3_darknet_voc.yml +++ /dev/null @@ -1,84 +0,0 @@ -architecture: YOLOv3 -train_feed: YoloTrainFeed -eval_feed: YoloEvalFeed -test_feed: YoloTestFeed -use_gpu: true -max_iters: 70000 -log_smooth_window: 20 -save_dir: output -snapshot_iter: 2000 -metric: VOC -map_type: 11point -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar -weights: output/yolov3_darknet_voc/model_final -num_classes: 20 - -YOLOv3: - backbone: DarkNet - yolo_head: YOLOv3Head - -DarkNet: - norm_type: sync_bn - norm_decay: 0. - depth: 53 - -YOLOv3Head: - anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - anchors: [[10, 13], [16, 30], [33, 23], - [30, 61], [62, 45], [59, 119], - [116, 90], [156, 198], [373, 326]] - norm_decay: 0. - ignore_thresh: 0.7 - label_smooth: false - nms: - background_label: -1 - keep_top_k: 100 - nms_threshold: 0.45 - nms_top_k: 1000 - normalized: false - score_threshold: 0.01 - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: - - 55000 - - 62000 - - !LinearWarmup - start_factor: 0. - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0005 - type: L2 - -YoloTrainFeed: - batch_size: 8 - dataset: - dataset_dir: dataset/voc - annotation: trainval.txt - use_default_label: true - num_workers: 8 - bufsize: 128 - use_process: true - mixup_epoch: 250 - -YoloEvalFeed: - batch_size: 8 - image_shape: [3, 608, 608] - dataset: - dataset_dir: dataset/voc - annotation: test.txt - use_default_label: true - -YoloTestFeed: - batch_size: 1 - image_shape: [3, 608, 608] - dataset: - use_default_label: true diff --git a/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1.yml b/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1.yml deleted file mode 100644 index 3e622025b587b56c6e79dd3a1cf1cbba00901406..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1.yml +++ /dev/null @@ -1,83 +0,0 @@ -architecture: YOLOv3 -train_feed: YoloTrainFeed -eval_feed: YoloEvalFeed -test_feed: YoloTestFeed -use_gpu: true -max_iters: 500200 -log_smooth_window: 20 -save_dir: output -snapshot_iter: 2000 -metric: COCO -pretrain_weights: http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar -weights: output/yolov3_mobilenet_v1/model_final -num_classes: 80 - -YOLOv3: - backbone: MobileNet - yolo_head: YOLOv3Head - -MobileNet: - norm_type: sync_bn - norm_decay: 0. - conv_group_scale: 1 - with_extra_blocks: false - -YOLOv3Head: - anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - anchors: [[10, 13], [16, 30], [33, 23], - [30, 61], [62, 45], [59, 119], - [116, 90], [156, 198], [373, 326]] - norm_decay: 0. - ignore_thresh: 0.7 - label_smooth: true - nms: - background_label: -1 - keep_top_k: 100 - nms_threshold: 0.45 - nms_top_k: 1000 - normalized: false - score_threshold: 0.01 - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: - - 400000 - - 450000 - - !LinearWarmup - start_factor: 0. - steps: 4000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0005 - type: L2 - -YoloTrainFeed: - batch_size: 8 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - num_workers: 8 - bufsize: 128 - use_process: true - -YoloEvalFeed: - batch_size: 8 - image_shape: [3, 608, 608] - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - -YoloTestFeed: - batch_size: 1 - image_shape: [3, 608, 608] - dataset: - annotation: dataset/coco/annotations/instances_val2017.json diff --git a/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1_fruit.yml b/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1_fruit.yml deleted file mode 100644 index c89aefc4199123c3ec764e798a15bbb4775a3298..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1_fruit.yml +++ /dev/null @@ -1,122 +0,0 @@ -architecture: YOLOv3 -train_feed: YoloTrainFeed -eval_feed: YoloEvalFeed -test_feed: YoloTestFeed -use_gpu: true -max_iters: 20000 -log_smooth_window: 20 -save_dir: output -snapshot_iter: 200 -metric: VOC -map_type: 11point -pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar -weights: output/yolov3_mobilenet_v1_fruit/best_model -num_classes: 3 -finetune_exclude_pretrained_params: ['yolo_output'] - -YOLOv3: - backbone: MobileNet - yolo_head: YOLOv3Head - -MobileNet: - norm_type: sync_bn - norm_decay: 0. - conv_group_scale: 1 - with_extra_blocks: false - -YOLOv3Head: - anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - anchors: [[10, 13], [16, 30], [33, 23], - [30, 61], [62, 45], [59, 119], - [116, 90], [156, 198], [373, 326]] - norm_decay: 0. - ignore_thresh: 0.7 - label_smooth: true - nms: - background_label: -1 - keep_top_k: 100 - nms_threshold: 0.45 - nms_top_k: 1000 - normalized: false - score_threshold: 0.01 - -LearningRate: - base_lr: 0.00001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: - - 15000 - - 18000 - - !LinearWarmup - start_factor: 0. - steps: 100 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0005 - type: L2 - -YoloTrainFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/fruit - annotation: fruit-detection/train.txt - use_default_label: false - num_workers: 16 - bufsize: 128 - use_process: true - mixup_epoch: -1 - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !NormalizeBox {} - - !ExpandImage - max_ratio: 4.0 - mean: [123.675, 116.28, 103.53] - prob: 0.5 - - !RandomInterpImage - max_size: 0 - target_size: 608 - - !RandomFlipImage - is_mask_flip: false - is_normalized: true - prob: 0.5 - - !NormalizeImage - is_channel_first: false - is_scale: true - mean: - - 0.485 - - 0.456 - - 0.406 - std: - - 0.229 - - 0.224 - - 0.225 - - !Permute - channel_first: true - to_bgr: false - batch_transforms: - - !RandomShape - sizes: [608] - with_background: false - -YoloEvalFeed: - batch_size: 1 - image_shape: [3, 608, 608] - dataset: - dataset_dir: dataset/fruit - annotation: fruit-detection/val.txt - use_default_label: false - - -YoloTestFeed: - batch_size: 1 - image_shape: [3, 608, 608] - dataset: - dataset_dir: dataset/fruit - use_default_label: false diff --git a/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1_voc.yml b/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1_voc.yml deleted file mode 100644 index 63e4cbea93f8a89b4f7890d4ecf66213eb578f66..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1_voc.yml +++ /dev/null @@ -1,85 +0,0 @@ -architecture: YOLOv3 -train_feed: YoloTrainFeed -eval_feed: YoloEvalFeed -test_feed: YoloTestFeed -use_gpu: true -max_iters: 70000 -log_smooth_window: 20 -save_dir: output -snapshot_iter: 2000 -metric: VOC -map_type: 11point -pretrain_weights: http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar -weights: output/yolov3_mobilenet_v1_voc/model_final -num_classes: 20 - -YOLOv3: - backbone: MobileNet - yolo_head: YOLOv3Head - -MobileNet: - norm_type: sync_bn - norm_decay: 0. - conv_group_scale: 1 - with_extra_blocks: false - -YOLOv3Head: - anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - anchors: [[10, 13], [16, 30], [33, 23], - [30, 61], [62, 45], [59, 119], - [116, 90], [156, 198], [373, 326]] - norm_decay: 0. - ignore_thresh: 0.7 - label_smooth: false - nms: - background_label: -1 - keep_top_k: 100 - nms_threshold: 0.45 - nms_top_k: 1000 - normalized: false - score_threshold: 0.01 - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: - - 55000 - - 62000 - - !LinearWarmup - start_factor: 0. - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0005 - type: L2 - -YoloTrainFeed: - batch_size: 8 - dataset: - dataset_dir: dataset/voc - annotation: trainval.txt - use_default_label: true - num_workers: 8 - bufsize: 128 - use_process: true - mixup_epoch: 250 - -YoloEvalFeed: - batch_size: 8 - image_shape: [3, 608, 608] - dataset: - dataset_dir: dataset/voc - annotation: test.txt - use_default_label: true - -YoloTestFeed: - batch_size: 1 - image_shape: [3, 608, 608] - dataset: - use_default_label: true diff --git a/PaddleCV/PaddleDetection/configs/yolov3_r34.yml b/PaddleCV/PaddleDetection/configs/yolov3_r34.yml deleted file mode 100644 index e864f8fd92d2f671c442f3beecd8344171952e48..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/yolov3_r34.yml +++ /dev/null @@ -1,85 +0,0 @@ -architecture: YOLOv3 -train_feed: YoloTrainFeed -eval_feed: YoloEvalFeed -test_feed: YoloTestFeed -use_gpu: true -max_iters: 500200 -log_smooth_window: 20 -save_dir: output -snapshot_iter: 2000 -metric: COCO -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar -weights: output/yolov3_r34/model_final -num_classes: 80 - -YOLOv3: - backbone: ResNet - yolo_head: YOLOv3Head - -ResNet: - norm_type: sync_bn - freeze_at: 0 - freeze_norm: false - norm_decay: 0. - depth: 34 - feature_maps: [3, 4, 5] - -YOLOv3Head: - anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - anchors: [[10, 13], [16, 30], [33, 23], - [30, 61], [62, 45], [59, 119], - [116, 90], [156, 198], [373, 326]] - norm_decay: 0. - ignore_thresh: 0.7 - label_smooth: true - nms: - background_label: -1 - keep_top_k: 100 - nms_threshold: 0.45 - nms_top_k: 1000 - normalized: false - score_threshold: 0.01 - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: - - 400000 - - 450000 - - !LinearWarmup - start_factor: 0. - steps: 4000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0005 - type: L2 - -YoloTrainFeed: - batch_size: 8 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - num_workers: 8 - bufsize: 128 - use_process: true - -YoloEvalFeed: - batch_size: 8 - image_shape: [3, 608, 608] - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - -YoloTestFeed: - batch_size: 1 - image_shape: [3, 608, 608] - dataset: - annotation: dataset/coco/annotations/instances_val2017.json diff --git a/PaddleCV/PaddleDetection/configs/yolov3_r34_voc.yml b/PaddleCV/PaddleDetection/configs/yolov3_r34_voc.yml deleted file mode 100644 index aa152e8fe88fb5eb3a86ac5b1623ce940ad81b1e..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/configs/yolov3_r34_voc.yml +++ /dev/null @@ -1,87 +0,0 @@ -architecture: YOLOv3 -train_feed: YoloTrainFeed -eval_feed: YoloEvalFeed -test_feed: YoloTestFeed -use_gpu: true -max_iters: 70000 -log_smooth_window: 20 -save_dir: output -snapshot_iter: 2000 -metric: VOC -map_type: 11point -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar -weights: output/yolov3_r34_voc/model_final -num_classes: 20 - -YOLOv3: - backbone: ResNet - yolo_head: YOLOv3Head - -ResNet: - norm_type: sync_bn - freeze_at: 0 - freeze_norm: false - norm_decay: 0. - depth: 34 - feature_maps: [3, 4, 5] - -YOLOv3Head: - anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - anchors: [[10, 13], [16, 30], [33, 23], - [30, 61], [62, 45], [59, 119], - [116, 90], [156, 198], [373, 326]] - norm_decay: 0. - ignore_thresh: 0.7 - label_smooth: false - nms: - background_label: -1 - keep_top_k: 100 - nms_threshold: 0.45 - nms_top_k: 1000 - normalized: false - score_threshold: 0.01 - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: - - 55000 - - 62000 - - !LinearWarmup - start_factor: 0. - steps: 1000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0005 - type: L2 - -YoloTrainFeed: - batch_size: 8 - dataset: - dataset_dir: dataset/voc - annotation: trainval.txt - use_default_label: true - num_workers: 8 - bufsize: 128 - use_process: true - mixup_epoch: 250 - -YoloEvalFeed: - batch_size: 8 - image_shape: [3, 608, 608] - dataset: - dataset_dir: dataset/voc - annotation: test.txt - use_default_label: true - -YoloTestFeed: - batch_size: 1 - image_shape: [3, 608, 608] - dataset: - use_default_label: true diff --git a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/001.png b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/001.png deleted file mode 100644 index 63ae9167fd03e8a95756fe5f6195fc8d741b9cfa..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/001.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/002.png b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/002.png deleted file mode 100644 index 0de905cf55e6b02487ee1b8220810df8eaa24c2c..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/002.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/003.png b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/003.png deleted file mode 100644 index e9026e099df42d4267be07a71401eb5426b47745..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/003.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/004.png b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/004.png deleted file mode 100644 index d8118ec3e0ef63bc74e825b5e7638a1886580604..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/004.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/output/001.png b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/output/001.png deleted file mode 100644 index 5194d6ff891b9507fedfc53f36de4f00219c7f30..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/output/001.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/output/004.png b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/output/004.png deleted file mode 100644 index 7c62be5051f9a47c5f5e98ccd9f45c3fa5f30257..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/demo/output/004.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/pedestrian.json b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/pedestrian.json deleted file mode 100644 index f72fe6dc65209ab3506d18556fb8b83b6ec832a9..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/pedestrian.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "images": [], - "annotations": [], - "categories": [ - { - "supercategory": "component", - "id": 1, - "name": "pedestrian" - } - ] -} diff --git a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/pedestrian_yolov3_darknet.yml b/PaddleCV/PaddleDetection/contrib/PedestrianDetection/pedestrian_yolov3_darknet.yml deleted file mode 100644 index adc9109aa356e109afc81bea13b856ce0f4be448..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/contrib/PedestrianDetection/pedestrian_yolov3_darknet.yml +++ /dev/null @@ -1,82 +0,0 @@ -architecture: YOLOv3 -train_feed: YoloTrainFeed -eval_feed: YoloEvalFeed -test_feed: YoloTestFeed -use_gpu: true -max_iters: 200000 -log_smooth_window: 20 -save_dir: output -snapshot_iter: 5000 -metric: COCO -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar -weights: https://paddlemodels.bj.bcebos.com/object_detection/pedestrian_yolov3_darknet.tar -num_classes: 1 - -YOLOv3: - backbone: DarkNet - yolo_head: YOLOv3Head - -DarkNet: - norm_type: sync_bn - norm_decay: 0. - depth: 53 - -YOLOv3Head: - anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - anchors: [[10, 13], [16, 30], [33, 23], - [30, 61], [62, 45], [59, 119], - [116, 90], [156, 198], [373, 326]] - norm_decay: 0. - ignore_thresh: 0.7 - label_smooth: true - nms: - background_label: -1 - keep_top_k: 100 - nms_threshold: 0.45 - nms_top_k: 1000 - normalized: false - score_threshold: 0.01 - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: - - 150000 - - 180000 - - !LinearWarmup - start_factor: 0. - steps: 4000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0005 - type: L2 - -YoloTrainFeed: - batch_size: 8 - dataset: - dataset_dir: dataset/pedestrian - annotation: annotations/instances_train2017.json - image_dir: train2017 - num_workers: 8 - bufsize: 128 - use_process: true - -YoloEvalFeed: - batch_size: 8 - image_shape: [3, 608, 608] - dataset: - dataset_dir: dataset/pedestrian - annotation: annotations/instances_val2017.json - image_dir: val2017 - -YoloTestFeed: - batch_size: 1 - image_shape: [3, 608, 608] - dataset: - annotation: contrib/PedestrianDetection/pedestrian.json diff --git a/PaddleCV/PaddleDetection/contrib/README.md b/PaddleCV/PaddleDetection/contrib/README.md deleted file mode 100644 index fbb55512acec372fa31d2955afe65cbc8abf7b2e..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/contrib/README.md +++ /dev/null @@ -1,104 +0,0 @@ -# PaddleDetection applied for specific scenarios - -We provide some models implemented by PaddlePaddle to detect objects in specific scenarios, users can download the models and use them in these scenarios. - -| Task | Algorithm | Box AP | Download | -|:---------------------|:---------:|:------:| :-------------------------------------------------------------------------------------: | -| Vehicle Detection | YOLOv3 | 54.5 | [model](https://paddlemodels.bj.bcebos.com/object_detection/vehicle_yolov3_darknet.tar) | -| Pedestrian Detection | YOLOv3 | 51.8 | [model](https://paddlemodels.bj.bcebos.com/object_detection/pedestrian_yolov3_darknet.tar) | - -## Vehicle Detection - -One of major applications of vehichle detection is traffic monitoring. In this scenary, vehicles to be detected are mostly captured by the cameras mounted on top of traffic light columns. - -### 1. Network - -The network for detecting vehicles is YOLOv3, the backbone of which is Dacknet53. - -### 2. Configuration for training - -PaddleDetection provides users with a configuration file [yolov3_darnet.yml](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/PaddleDetection/configs/yolov3_darknet.yml) to train YOLOv3 on the COCO dataset, compared with this file, we modify some parameters as followed to conduct the training for vehicle detection: - -* max_iters: 120000 -* num_classes: 6 -* anchors: [[8, 9], [10, 23], [19, 15], [23, 33], [40, 25], [54, 50], [101, 80], [139, 145], [253, 224]] -* label_smooth: false -* nms/nms_top_k: 400 -* nms/score_threshold: 0.005 -* milestones: [60000, 80000] -* dataset_dir: dataset/vehicle - -### 3. Accuracy - -The accuracy of the model trained and evaluated on our private data is shown as followed: - -AP at IoU=.50:.05:.95 is 0.545. - -AP at IoU=.50 is 0.764. - -### 4. Inference - -Users can employ the model to conduct the inference: - -``` -export CUDA_VISIBLE_DEVICES=0 -export PYTHONPATH=$PYTHONPATH:. -python -u tools/infer.py -c contrib/VehicleDetection/vehicle_yolov3_darknet.yml \ - -o weights=https://paddlemodels.bj.bcebos.com/object_detection/vehicle_yolov3_darknet.tar \ - --infer_dir contrib/VehicleDetection/demo \ - --draw_threshold 0.2 \ - --output_dir contrib/VehicleDetection/demo/output - -``` - -Some inference results are visualized below: - -![](VehicleDetection/demo/output/001.jpeg) - -![](VehicleDetection/demo/output/005.png) - -## Pedestrian Detection - -The main applications of pedetestrian detection include intelligent monitoring. In this scenary, photos of pedetestrians are taken by surveillance cameras in public areas, then pedestrian detection are conducted on these photos. - -### 1. Network - -The network for detecting vehicles is YOLOv3, the backbone of which is Dacknet53. - -### 2. Configuration for training - -PaddleDetection provides users with a configuration file [yolov3_darnet.yml](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/PaddleDetection/configs/yolov3_darknet.yml) to train YOLOv3 on the COCO dataset, compared with this file, we modify some parameters as followed to conduct the training for pedestrian detection: - -* max_iters: 200000 -* num_classes: 1 -* snapshot_iter: 5000 -* milestones: [150000, 180000] -* dataset_dir: dataset/pedestrian - -### 3. Accuracy - -The accuracy of the model trained and evaluted on our private data is shown as followed: - -AP at IoU=.50:.05:.95 is 0.518. - -AP at IoU=.50 is 0.792. - -### 4. Inference - -Users can employ the model to conduct the inference: - -``` -export CUDA_VISIBLE_DEVICES=0 -export PYTHONPATH=$PYTHONPATH:. -python -u tools/infer.py -c contrib/PedestrianDetection/pedestrian_yolov3_darknet.yml \ - -o weights=https://paddlemodels.bj.bcebos.com/object_detection/pedestrian_yolov3_darknet.tar \ - --infer_dir contrib/PedestrianDetection/demo \ - --draw_threshold 0.3 \ - --output_dir contrib/PedestrianDetection/demo/output -``` - -Some inference results are visualized below: - -![](PedestrianDetection/demo/output/001.png) - -![](PedestrianDetection/demo/output/004.png) diff --git a/PaddleCV/PaddleDetection/contrib/README_cn.md b/PaddleCV/PaddleDetection/contrib/README_cn.md deleted file mode 100644 index 92fc25ece0ca62c0255582208706e8a773f80e5a..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/contrib/README_cn.md +++ /dev/null @@ -1,106 +0,0 @@ -# PaddleDetection 特色垂类检测模型 - -我们提供了针对不同场景的基于PaddlePaddle的检测模型,用户可以下载模型进行使用。 - -| 任务 | 算法 | 精度(Box AP) | 下载 | -|:---------------------|:---------:|:------:| :---------------------------------------------------------------------------------: | -| 车辆检测 | YOLOv3 | 54.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/vehicle_yolov3_darknet.tar) | -| 行人检测 | YOLOv3 | 51.8 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/pedestrian_yolov3_darknet.tar) | - - -## 车辆检测(Vehicle Detection) - -车辆检测的主要应用之一是交通监控。在这样的监控场景中,待检测的车辆多为道路红绿灯柱上的摄像头拍摄所得。 - -### 1. 模型结构 - -Backbone为Dacknet53的YOLOv3。 - -### 2. 训练参数配置 - -PaddleDetection提供了使用COCO数据集对YOLOv3进行训练的参数配置文件[yolov3_darnet.yml](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/PaddleDetection/configs/yolov3_darknet.yml),与之相比,在进行车辆检测的模型训练时,我们对以下参数进行了修改: - -* max_iters: 120000 -* num_classes: 6 -* anchors: [[8, 9], [10, 23], [19, 15], [23, 33], [40, 25], [54, 50], [101, 80], [139, 145], [253, 224]] -* label_smooth: false -* nms/nms_top_k: 400 -* nms/score_threshold: 0.005 -* milestones: [60000, 80000] -* dataset_dir: dataset/vehicle - -### 3. 精度指标 - -模型在我们内部数据上的精度指标为: - -IOU=.50:.05:.95时的AP为 0.545。 - -IOU=.5时的AP为 0.764。 - -### 4. 预测 - -用户可以使用我们训练好的模型进行车辆检测: - -``` -export CUDA_VISIBLE_DEVICES=0 -export PYTHONPATH=$PYTHONPATH:. -python -u tools/infer.py -c contrib/VehicleDetection/vehicle_yolov3_darknet.yml \ - -o weights=https://paddlemodels.bj.bcebos.com/object_detection/vehicle_yolov3_darknet.tar \ - --infer_dir contrib/VehicleDetection/demo \ - --draw_threshold 0.2 \ - --output_dir contrib/VehicleDetection/demo/output - -``` - -预测结果示例: - -![](VehicleDetection/demo/output/001.jpeg) - -![](VehicleDetection/demo/output/005.png) - -## 行人检测(Pedestrian Detection) - -行人检测的主要应用有智能监控。在监控场景中,大多是从公共区域的监控摄像头视角拍摄行人,获取图像后再进行行人检测。 - -### 1. 模型结构 - -Backbone为Dacknet53的YOLOv3。 - - -### 2. 训练参数配置 - -PaddleDetection提供了使用COCO数据集对YOLOv3进行训练的参数配置文件[yolov3_darnet.yml](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/PaddleDetection/configs/yolov3_darknet.yml),与之相比,在进行行人检测的模型训练时,我们对以下参数进行了修改: - -* max_iters: 200000 -* num_classes: 1 -* snapshot_iter: 5000 -* milestones: [150000, 180000] -* dataset_dir: dataset/pedestrian - -### 2. 精度指标 - -模型在我们针对监控场景的内部数据上精度指标为: - -IOU=.5时的AP为 0.792。 - -IOU=.5-.95时的AP为 0.518。 - -### 3. 预测 - -用户可以使用我们训练好的模型进行行人检测: - -``` -export CUDA_VISIBLE_DEVICES=0 -export PYTHONPATH=$PYTHONPATH:. -python -u tools/infer.py -c contrib/PedestrianDetection/pedestrian_yolov3_darknet.yml \ - -o weights=https://paddlemodels.bj.bcebos.com/object_detection/pedestrian_yolov3_darknet.tar \ - --infer_dir contrib/PedestrianDetection/demo \ - --draw_threshold 0.3 \ - --output_dir contrib/PedestrianDetection/demo/output -``` - -预测结果示例: - -![](PedestrianDetection/demo/output/001.png) - -![](PedestrianDetection/demo/output/004.png) diff --git a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/001.jpeg b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/001.jpeg deleted file mode 100644 index 8786db5eb6773931c363358bb39462b33db55369..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/001.jpeg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/003.png b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/003.png deleted file mode 100644 index c01ab4ce769fb3b1c8863093a35d27da0ab10efd..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/003.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/004.png b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/004.png deleted file mode 100644 index 8907eb8d4d9b82e08ca214509c9fb41ca889db2a..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/004.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/005.png b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/005.png deleted file mode 100644 index bf17712809c2fe6fa8e7d4f093ec4ac94523537c..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/005.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/output/001.jpeg b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/output/001.jpeg deleted file mode 100644 index aa2b679d4d2a73487edd5f9c67323ab18df93893..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/output/001.jpeg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/output/005.png b/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/output/005.png deleted file mode 100644 index 57f918a30fcc5bf7bda284c1a1a0304e8822d325..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/contrib/VehicleDetection/demo/output/005.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/contrib/VehicleDetection/vehicle.json b/PaddleCV/PaddleDetection/contrib/VehicleDetection/vehicle.json deleted file mode 100644 index 5863a9a8c9e0d8b4daeff31e7fe7869e084d3fb4..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/contrib/VehicleDetection/vehicle.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "images": [], - "annotations": [], - "categories": [ - { - "supercategory": "component", - "id": 1, - "name": "car" - }, - { - "supercategory": "component", - "id": 2, - "name": "truck" - }, - { - "supercategory": "component", - "id": 3, - "name": "bus" - }, - { - "supercategory": "component", - "id": 4, - "name": "motorbike" - }, - { - "supercategory": "component", - "id": 5, - "name": "tricycle" - }, - { - "supercategory": "component", - "id": 6, - "name": "carplate" - } - ] -} diff --git a/PaddleCV/PaddleDetection/contrib/VehicleDetection/vehicle_yolov3_darknet.yml b/PaddleCV/PaddleDetection/contrib/VehicleDetection/vehicle_yolov3_darknet.yml deleted file mode 100644 index 6a923a0c109a30ed2e247fc7204e81ef0a82eef4..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/contrib/VehicleDetection/vehicle_yolov3_darknet.yml +++ /dev/null @@ -1,82 +0,0 @@ -architecture: YOLOv3 -train_feed: YoloTrainFeed -eval_feed: YoloEvalFeed -test_feed: YoloTestFeed -use_gpu: true -max_iters: 120000 -log_smooth_window: 20 -save_dir: output -snapshot_iter: 2000 -metric: COCO -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar -weights: https://paddlemodels.bj.bcebos.com/object_detection/vehicle_yolov3_darknet.tar -num_classes: 6 - -YOLOv3: - backbone: DarkNet - yolo_head: YOLOv3Head - -DarkNet: - norm_type: sync_bn - norm_decay: 0. - depth: 53 - -YOLOv3Head: - anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - anchors: [[8, 9], [10, 23], [19, 15], - [23, 33], [40, 25], [54, 50], - [101, 80], [139, 145], [253, 224]] - norm_decay: 0. - ignore_thresh: 0.7 - label_smooth: false - nms: - background_label: -1 - keep_top_k: 100 - nms_threshold: 0.45 - nms_top_k: 400 - normalized: false - score_threshold: 0.005 - -LearningRate: - base_lr: 0.001 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: - - 60000 - - 80000 - - !LinearWarmup - start_factor: 0. - steps: 4000 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0005 - type: L2 - -YoloTrainFeed: - batch_size: 8 - dataset: - dataset_dir: dataset/vehicle - annotation: annotations/instances_train2017.json - image_dir: train2017 - num_workers: 8 - bufsize: 128 - use_process: true - -YoloEvalFeed: - batch_size: 8 - image_shape: [3, 608, 608] - dataset: - dataset_dir: dataset/vehicle - annotation: annotations/instances_val2017.json - image_dir: val2017 - -YoloTestFeed: - batch_size: 1 - image_shape: [3, 608, 608] - dataset: - annotation: contrib/VehicleDetection/vehicle.json diff --git a/PaddleCV/PaddleDetection/dataset/coco/download_coco.py b/PaddleCV/PaddleDetection/dataset/coco/download_coco.py deleted file mode 100644 index 2b4f7e764e17296ccd8905478bf3ccb3818b909f..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/dataset/coco/download_coco.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import sys -import os.path as osp -import logging - -from ppdet.utils.download import download_dataset - -logging.basicConfig(level=logging.INFO) - -download_path = osp.split(osp.realpath(sys.argv[0]))[0] -download_dataset(download_path, 'coco') diff --git a/PaddleCV/PaddleDetection/dataset/fruit/download_fruit.py b/PaddleCV/PaddleDetection/dataset/fruit/download_fruit.py deleted file mode 100644 index 5cce18895af3eeb81c4e49f4897cc591b2f40f9b..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/dataset/fruit/download_fruit.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import sys -import os.path as osp -import logging - -from ppdet.utils.download import download_dataset - -logging.basicConfig(level=logging.INFO) - -download_path = osp.split(osp.realpath(sys.argv[0]))[0] -download_dataset(download_path, 'fruit') diff --git a/PaddleCV/PaddleDetection/dataset/voc/create_list.py b/PaddleCV/PaddleDetection/dataset/voc/create_list.py deleted file mode 100644 index 1a237493bd50d7be3ee2ab5275159fc5783e5f59..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/dataset/voc/create_list.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import sys -import os.path as osp -import logging - -from ppdet.utils.download import create_voc_list - -logging.basicConfig(level=logging.INFO) - -voc_path = osp.split(osp.realpath(sys.argv[0]))[0] -create_voc_list(voc_path) diff --git a/PaddleCV/PaddleDetection/dataset/voc/download_voc.py b/PaddleCV/PaddleDetection/dataset/voc/download_voc.py deleted file mode 100644 index e7f32657f1697bd82f0f7dfbb52a3d1cb987c4bd..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/dataset/voc/download_voc.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import sys -import os.path as osp -import logging - -from ppdet.utils.download import download_dataset - -logging.basicConfig(level=logging.INFO) - -download_path = osp.split(osp.realpath(sys.argv[0]))[0] -download_dataset(download_path, 'voc') diff --git a/PaddleCV/PaddleDetection/dataset/voc/label_list.txt b/PaddleCV/PaddleDetection/dataset/voc/label_list.txt deleted file mode 100644 index 8420ab35ede7400974f25836a6bb543024686a0e..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/dataset/voc/label_list.txt +++ /dev/null @@ -1,20 +0,0 @@ -aeroplane -bicycle -bird -boat -bottle -bus -car -cat -chair -cow -diningtable -dog -horse -motorbike -person -pottedplant -sheep -sofa -train -tvmonitor diff --git a/PaddleCV/PaddleDetection/dataset/wider_face/download.sh b/PaddleCV/PaddleDetection/dataset/wider_face/download.sh deleted file mode 100755 index 6c86a22c6826d88846a16fbd43f8b556d8610b8f..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/dataset/wider_face/download.sh +++ /dev/null @@ -1,21 +0,0 @@ -# All rights `PaddleDetection` reserved -# References: -# @inproceedings{yang2016wider, -# Author = {Yang, Shuo and Luo, Ping and Loy, Chen Change and Tang, Xiaoou}, -# Booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, -# Title = {WIDER FACE: A Face Detection Benchmark}, -# Year = {2016}} - -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" - -# Download the data. -echo "Downloading..." -wget https://dataset.bj.bcebos.com/wider_face/WIDER_train.zip -wget https://dataset.bj.bcebos.com/wider_face/WIDER_val.zip -wget https://dataset.bj.bcebos.com/wider_face/wider_face_split.zip -# Extract the data. -echo "Extracting..." -unzip WIDER_train.zip -unzip WIDER_val.zip -unzip wider_face_split.zip diff --git a/PaddleCV/PaddleDetection/demo/000000014439.jpg b/PaddleCV/PaddleDetection/demo/000000014439.jpg deleted file mode 100644 index 0abbdab06eb5950b93908cc91adfa640e8a3ac78..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/demo/000000014439.jpg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/demo/000000014439_640x640.jpg b/PaddleCV/PaddleDetection/demo/000000014439_640x640.jpg deleted file mode 100644 index 58e9d3e228af43c9b55d8d0cb385ce82ebb8b996..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/demo/000000014439_640x640.jpg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/demo/000000087038.jpg b/PaddleCV/PaddleDetection/demo/000000087038.jpg deleted file mode 100644 index 9f77f5d5f057b6f92dc096da704ecb8dee99bdf5..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/demo/000000087038.jpg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/demo/000000570688.jpg b/PaddleCV/PaddleDetection/demo/000000570688.jpg deleted file mode 100644 index cb304bd56c4010c08611a30dcca58ea9140cea54..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/demo/000000570688.jpg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/demo/cas.png b/PaddleCV/PaddleDetection/demo/cas.png deleted file mode 100644 index a60303c99ce0d1ba52e6f89414df8bd5c90fae62..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/demo/cas.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/demo/mask_rcnn_demo.ipynb b/PaddleCV/PaddleDetection/demo/mask_rcnn_demo.ipynb deleted file mode 100644 index 860b185043679e3c7bb28c4fdad505c9f16dda56..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/demo/mask_rcnn_demo.ipynb +++ /dev/null @@ -1,413 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "source": [ - "Change working directory to the project root" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "autoscroll": false, - "ein.hycell": false, - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/yang/models/PaddleCV/PaddleDetection\n" - ] - } - ], - "source": [ - "%cd .." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "source": [ - "Now let's take a look at the input image." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "autoscroll": false, - "ein.hycell": false, - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from PIL import Image\n", - "\n", - "image_path = 'demo/000000570688.jpg'\n", - "img = Image.open(image_path)\n", - "img" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "source": [ - "For inference, preprocess only involves decoding, normalization and transposing to CHW.\n", - "\n", - "**NOTE:** in most cases, one should use the configuration based [data feed](../docs/DATA.md) API which greatly simplifies the data pipeline." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "autoscroll": false, - "ein.hycell": false, - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [], - "source": [ - "from ppdet.data.transform.operators import DecodeImage, NormalizeImage, Permute\n", - "\n", - "sample = {'im_file': image_path}\n", - "decode = DecodeImage(to_rgb=True)\n", - "normalize = NormalizeImage(\n", - " mean=[0.485, 0.456, 0.406],\n", - " std=[0.229, 0.224, 0.225],\n", - " is_scale=True,\n", - " is_channel_first=False)\n", - "permute = Permute(to_bgr=False, channel_first=True)\n", - "\n", - "sample = permute(normalize(decode(sample)))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "source": [ - "Some extra effort is needed to massage data into the desired format. \n", - "\n", - "**NOTE:** Again, if the data feed API is used, these are handled automatically." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "autoscroll": false, - "ein.hycell": false, - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "h = sample['h']\n", - "w = sample['w']\n", - "im_info = np.array((h, w, 1), dtype=np.float32)\n", - "\n", - "sample['im_info'] = im_info\n", - "sample['im_shape'] = im_info\n", - "\n", - "# we don't need these\n", - "for key in ['im_file', 'h', 'w']:\n", - " del sample[key]\n", - "\n", - "# batch of a single sample\n", - "sample = {k: v[np.newaxis, ...] for k, v in sample.items()}\n", - "\n", - "feed_var_def = [\n", - " {'name': 'image', 'shape': (h, w, 3)},\n", - " {'name': 'im_info', 'shape': [3]},\n", - " {'name': 'im_shape', 'shape': [3]},\n", - "]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "source": [ - "Next, build the [Mask R-CNN](https://arxiv.org/abs/1703.06870) model and associated fluid programs" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "autoscroll": false, - "ein.hycell": false, - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [], - "source": [ - "from paddle import fluid\n", - "from ppdet.modeling import (MaskRCNN, ResNet, ResNetC5, RPNHead, RoIAlign,\n", - " BBoxHead, MaskHead, BBoxAssigner, MaskAssigner)\n", - "\n", - "roi_size = 14\n", - "\n", - "model = MaskRCNN(\n", - " ResNet(feature_maps=4),\n", - " RPNHead(),\n", - " BBoxHead(ResNetC5()),\n", - " BBoxAssigner(),\n", - " RoIAlign(resolution=roi_size),\n", - " MaskAssigner(),\n", - " MaskHead())\n", - "\n", - "startup_prog = fluid.Program()\n", - "infer_prog = fluid.Program()\n", - "with fluid.program_guard(infer_prog, startup_prog):\n", - " with fluid.unique_name.guard():\n", - " feed_vars = {\n", - " var['name']: fluid.layers.data(\n", - " name=var['name'],\n", - " shape=var['shape'],\n", - " dtype='float32',\n", - " lod_level=0) for var in feed_var_def\n", - " }\n", - " test_fetches = model.test(feed_vars)\n", - "infer_prog = infer_prog.clone(for_test=True)\n", - "\n", - "# use GPU if available\n", - "if fluid.core.get_cuda_device_count() > 0:\n", - " place = fluid.CUDAPlace(0)\n", - "else:\n", - " place = fluid.CPUPlace()\n", - "\n", - "exe = fluid.Executor(place)\n", - "_ = exe.run(startup_prog)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "source": [ - "Load the checkpoint weights, just wait a couple of minutes for it to be downloaded." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "autoscroll": false, - "ein.hycell": false, - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 140690/140690 [00:12<00:00, 10843.70KB/s]\n" - ] - } - ], - "source": [ - "from ppdet.utils import checkpoint\n", - "\n", - "ckpt_url = 'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_1x.tar'\n", - "checkpoint.load_checkpoint(exe, infer_prog, ckpt_url)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "source": [ - "Run the program and fetch the result. " - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "autoscroll": false, - "ein.hycell": false, - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [], - "source": [ - "output = exe.run(infer_prog, feed=sample,\n", - " fetch_list=[t.name for t in test_fetches.values()],\n", - " return_numpy=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "source": [ - "Again, we need to massage the result a bit for visualization." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "autoscroll": false, - "ein.hycell": false, - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [], - "source": [ - "res = {\n", - " k: (np.array(v), v.recursive_sequence_lengths())\n", - " for k, v in zip(test_fetches.keys(), output)\n", - "}\n", - "# fake image id\n", - "res['im_id'] = [[[0] for _ in range(res['bbox'][1][0][0])]]\n", - "res['im_shape'] = [[im_info]]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "source": [ - "Now overlay the bboxes and masks onto the image...\n", - "\n", - "And voila, we've successully built and run the Mask R-CNN inference pipeline." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "autoscroll": false, - "ein.hycell": false, - "ein.tags": "worksheet-0", - "slideshow": { - "slide_type": "-" - } - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from ppdet.utils.coco_eval import bbox2out, mask2out, coco17_category_info\n", - "from ppdet.utils.visualizer import visualize_results\n", - "\n", - "cls2cat, cat2name = coco17_category_info()\n", - "bboxes = bbox2out([res], cls2cat)\n", - "masks = mask2out([res], cls2cat, roi_size)\n", - "\n", - "visualize_results(img, 0, cat2name, 0.5, bboxes, masks)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.16" - }, - "name": "mask_rcnn_demo.ipynb" - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/PaddleCV/PaddleDetection/demo/obj365_gt.png b/PaddleCV/PaddleDetection/demo/obj365_gt.png deleted file mode 100644 index eb69077f6ba1676d9fc0ba3e4cada645c5ab7245..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/demo/obj365_gt.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/demo/obj365_pred.png b/PaddleCV/PaddleDetection/demo/obj365_pred.png deleted file mode 100644 index d99fb1fd198f1f8b6e003741375e9aa5524f117a..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/demo/obj365_pred.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/demo/orange_71.jpg b/PaddleCV/PaddleDetection/demo/orange_71.jpg deleted file mode 100644 index da7974a1a1371298f1ca5f4ef9c82bd3824d7ac3..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/demo/orange_71.jpg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/demo/orange_71_detection.jpg b/PaddleCV/PaddleDetection/demo/orange_71_detection.jpg deleted file mode 100644 index 88cbf9c97120f79d5ef5f80ccb789a6e3c29bedf..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/demo/orange_71_detection.jpg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/demo/output/000000570688.jpg b/PaddleCV/PaddleDetection/demo/output/000000570688.jpg deleted file mode 100644 index 8a0f84f38331093a0e1afc52d4b7747535bdbb6d..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/demo/output/000000570688.jpg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/demo/output/12_Group_Group_12_Group_Group_12_935.jpg b/PaddleCV/PaddleDetection/demo/output/12_Group_Group_12_Group_Group_12_935.jpg deleted file mode 100644 index 2a563361ae03fbe079dba017374eee51ccbd17dd..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/demo/output/12_Group_Group_12_Group_Group_12_935.jpg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/demo/tensorboard_fruit.jpg b/PaddleCV/PaddleDetection/demo/tensorboard_fruit.jpg deleted file mode 100644 index 44a955fafffb4ab03d911818e20e6f72499f2f4f..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/demo/tensorboard_fruit.jpg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/docs/BENCHMARK_INFER_cn.md b/PaddleCV/PaddleDetection/docs/BENCHMARK_INFER_cn.md deleted file mode 100644 index bc4c25fee1555eb124c25edfcd066af0f6c7bfcf..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/BENCHMARK_INFER_cn.md +++ /dev/null @@ -1,89 +0,0 @@ - - -# 推理Benchmark - - - -- 测试环境: - - CUDA 9.0 - - CUDNN 7.5 - - TensorRT-5.1.2.2 - - PaddlePaddle v1.6 - - GPU分别为: Tesla V100和Tesla P4 -- 测试方式: - - 为了方面比较不同模型的推理速度,输入采用同样大小的图片,为 3x640x640,采用 `demo/000000014439_640x640.jpg` 图片。 - - Batch Size=1 - - 去掉前10轮warmup时间,测试100轮的平均时间,单位ms/image,包括输入数据拷贝至GPU的时间、计算时间、数据拷贝只CPU的时间。 - - 采用Fluid C++预测引擎: 包含Fluid C++预测、Fluid-TensorRT预测,下面同时测试了Float32 (FP32) 和Float16 (FP16)的推理速度。 - - 测试时开启了 FLAGS_cudnn_exhaustive_search=True,使用exhaustive方式搜索卷积计算算法。 - -### 推理速度 - - - - - -| 模型 | Tesla V100 Fluid (ms/image) | Tesla V100 Fluid-TensorRT-FP32 (ms/image) | Tesla V100 Fluid-TensorRT-FP16 (ms/image) | Tesla P4 Fluid (ms/image) | Tesla P4 Fluid-TensorRT-FP32 (ms/image) | -| ------------------------------------- | ----------------------------- | ------------------------------------------- | ------------------------------------------- | --------------------------- | ----------------------------------------- | -| faster_rcnn_r50_1x | 147.488 | 146.124 | 142.416 | 471.547 | 471.631 | -| faster_rcnn_r50_2x | 147.636 | 147.73 | 141.664 | 471.548 | 472.86 | -| faster_rcnn_r50_vd_1x | 146.588 | 144.767 | 141.208 | 459.357 | 457.852 | -| faster_rcnn_r50_fpn_1x | 25.11 | 24.758 | 20.744 | 59.411 | 57.585 | -| faster_rcnn_r50_fpn_2x | 25.351 | 24.505 | 20.509 | 59.594 | 57.591 | -| faster_rcnn_r50_vd_fpn_2x | 25.514 | 25.292 | 21.097 | 61.026 | 58.377 | -| faster_rcnn_r50_fpn_gn_2x | 36.959 | 36.173 | 32.356 | 101.339 | 101.212 | -| faster_rcnn_dcn_r50_fpn_1x | 28.707 | 28.162 | 27.503 | 68.154 | 67.443 | -| faster_rcnn_dcn_r50_vd_fpn_2x | 28.576 | 28.271 | 27.512 | 68.959 | 68.448 | -| faster_rcnn_r101_1x | 153.267 | 150.985 | 144.849 | 490.104 | 486.836 | -| faster_rcnn_r101_fpn_1x | 30.949 | 30.331 | 24.021 | 73.591 | 69.736 | -| faster_rcnn_r101_fpn_2x | 30.918 | 29.126 | 23.677 | 73.563 | 70.32 | -| faster_rcnn_r101_vd_fpn_1x | 31.144 | 30.202 | 23.57 | 74.767 | 70.773 | -| faster_rcnn_r101_vd_fpn_2x | 30.678 | 29.969 | 23.327 | 74.882 | 70.842 | -| faster_rcnn_x101_vd_64x4d_fpn_1x | 60.36 | 58.461 | 45.172 | 132.178 | 131.734 | -| faster_rcnn_x101_vd_64x4d_fpn_2x | 59.003 | 59.163 | 46.065 | 131.422 | 132.186 | -| faster_rcnn_dcn_r101_vd_fpn_1x | 36.862 | 37.205 | 36.539 | 93.273 | 92.616 | -| faster_rcnn_dcn_x101_vd_64x4d_fpn_1x | 78.476 | 78.335 | 77.559 | 185.976 | 185.996 | -| faster_rcnn_se154_vd_fpn_s1x | 166.282 | 90.508 | 80.738 | 304.653 | 193.234 | -| mask_rcnn_r50_1x | 160.185 | 160.4 | 160.322 | - | - | -| mask_rcnn_r50_2x | 159.821 | 159.527 | 160.41 | - | - | -| mask_rcnn_r50_fpn_1x | 95.72 | 95.719 | 92.455 | 259.8 | 258.04 | -| mask_rcnn_r50_fpn_2x | 84.545 | 83.567 | 79.269 | 227.284 | 222.975 | -| mask_rcnn_r50_vd_fpn_2x | 82.07 | 82.442 | 77.187 | 223.75 | 221.683 | -| mask_rcnn_r50_fpn_gn_2x | 94.936 | 94.611 | 91.42 | 265.468 | 263.76 | -| mask_rcnn_dcn_r50_fpn_1x | 97.828 | 97.433 | 93.76 | 256.295 | 258.056 | -| mask_rcnn_dcn_r50_vd_fpn_2x | 77.831 | 79.453 | 76.983 | 205.469 | 204.499 | -| mask_rcnn_r101_fpn_1x | 95.543 | 97.929 | 90.314 | 252.997 | 250.782 | -| mask_rcnn_r101_vd_fpn_1x | 98.046 | 97.647 | 90.272 | 261.286 | 262.108 | -| mask_rcnn_x101_vd_64x4d_fpn_1x | 115.461 | 115.756 | 102.04 | 296.066 | 293.62 | -| mask_rcnn_x101_vd_64x4d_fpn_2x | 107.144 | 107.29 | 97.275 | 267.636 | 267.577 | -| mask_rcnn_dcn_r101_vd_fpn_1x | 85.504 | 84.875 | 84.907 | 225.202 | 226.585 | -| mask_rcnn_dcn_x101_vd_64x4d_fpn_1x | 129.937 | 129.934 | 127.804 | 326.786 | 326.161 | -| mask_rcnn_se154_vd_fpn_s1x | 214.188 | 139.807 | 121.516 | 440.391 | 439.727 | -| cascade_rcnn_r50_fpn_1x | 36.866 | 36.949 | 36.637 | 101.851 | 101.912 | -| cascade_mask_rcnn_r50_fpn_1x | 110.344 | 106.412 | 100.367 | 301.703 | 297.739 | -| cascade_rcnn_dcn_r50_fpn_1x | 40.412 | 39.58 | 39.853 | 110.346 | 110.077 | -| cascade_mask_rcnn_r50_fpn_gn_2x | 170.092 | 168.758 | 163.298 | 527.998 | 529.59 | -| cascade_rcnn_dcn_r101_vd_fpn_1x | 48.414 | 48.849 | 48.701 | 134.9 | 134.846 | -| cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x | 90.062 | 90.218 | 90.009 | 228.67 | 228.396 | -| retinanet_r101_fpn_1x | 55.59 | 54.636 | 48.489 | 90.394 | 83.951 | -| retinanet_r50_fpn_1x | 50.048 | 47.932 | 44.385 | 73.819 | 70.282 | -| retinanet_x101_vd_64x4d_fpn_1x | 83.329 | 83.446 | 70.76 | 145.936 | 146.168 | -| yolov3_darknet | 21.427 | 20.252 | 13.856 | 55.173 | 55.692 | -| yolov3_darknet_voc | 17.58 | 16.241 | 9.473 | 51.049 | 51.249 | -| yolov3_mobilenet_v1 | 12.869 | 11.834 | 9.408 | 24.887 | 21.352 | -| yolov3_mobilenet_v1_voc | 9.118 | 8.146 | 5.575 | 20.787 | 17.169 | -| yolov3_r34 | 14.914 | 14.125 | 11.176 | 20.798 | 20.822 | -| yolov3_r34_voc | 11.288 | 10.73 | 7.7 | 25.874 | 22.399 | -| ssd_mobilenet_v1_voc | 5.763 | 5.854 | 4.589 | 11.75 | 9.485 | -| ssd_vgg16_300 | 28.722 | 29.644 | 20.399 | 73.707 | 74.531 | -| ssd_vgg16_300_voc | 18.425 | 19.288 | 11.298 | 56.297 | 56.201 | -| ssd_vgg16_512 | 27.471 | 28.328 | 19.328 | 68.685 | 69.808 | -| ssd_vgg16_512_voc | 18.721 | 19.636 | 12.004 | 54.688 | 56.174 | - -1. RCNN系列模型Fluid-TensorRT速度相比Fluid预测没有优势,原因是: TensorRT仅支持定长输入,当前基于ResNet系列的RCNN模型,只有backbone部分采用了TensorRT子图计算,比较耗时的stage-5没有基于TensorRT计算。 Fluid对CNN模型也做了一系列的融合优化。后续TensorRT版本升级、或有其他优化策略时再更新数据。 -2. YOLO v3系列模型,Fluid-TensorRT相比Fluid预测加速5% - 10%不等。 -3. SSD和YOLOv3系列模型 TensorRT-FP16预测速度有一定的优势,加速约20% - 40%不等。具体如下图。 - -
- -
diff --git a/PaddleCV/PaddleDetection/docs/CACascadeRCNN.md b/PaddleCV/PaddleDetection/docs/CACascadeRCNN.md deleted file mode 100644 index 8e72b9af30183abf6c028ec2f4ee2ec695a29a6c..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/CACascadeRCNN.md +++ /dev/null @@ -1,51 +0,0 @@ -# CACascade RCNN -## 简介 -CACascade RCNN是百度视觉技术部在Objects365 2019 Challenge上夺冠的最佳单模型之一,Objects365是在通用物体检测领域的一个全新的数据集,旨在促进对自然场景不同对象的检测研究。Objects365在63万张图像上标注了365个对象类,训练集中共有超过1000万个边界框。这里放出的是Full Track任务中最好的单模型之一。 - -
- -
- -## 方法描述 - -针对大规模物体检测算法的特点,我们提出了一种基于图片包含物体类别的数量的采样方式(Class Aware Sampling)。基于这种方式进行训练模型可以在更短的时间使模型收敛到更好的效果。 - -
- -
- -本次公布的最好单模型是一个基于Cascade RCNN的两阶段检测模型,在此基础上将Backbone替换为更加强大的SENet154模型,Deformable Conv模块以及更复杂二阶段网络结构,针对BatchSize比较小的情况增加了Group Normalization操作并同时使用了多尺度训练,最终达到了非常理想的效果。预训练模型先后分别在ImageNet和COCO数据集上进行了训练,其中在COCO数据集上训练时增加了Mask分支,其余结构与CACascade RCNN相同, 会在启动训练时自动下载。 - -## 使用方法 - -1.准备数据 - -数据需要通过[Objects365官方网站](https://www.objects365.org/download.html)进行申请下载,数据下载后将数据放置在dataset目录中。 -``` -${THIS REPO ROOT} - \--dataset - \-- objects365 - \-- annotations - |-- train.json - |-- val.json - \-- train - \-- val -``` - -2.启动训练模型 - -```bash -python tools/train.py -c configs/obj365/cascade_rcnn_dcnv2_se154_vd_fpn_gn.yml -``` - -3.模型预测结果 - -| 模型 | 验证集 mAP | 下载链接 | -| :-----------------: | :--------: | :----------------------------------------------------------: | -| CACascadeRCNN SE154 | 31.7 | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas_obj365.tar) | - -## 模型效果 - -
- -
diff --git a/PaddleCV/PaddleDetection/docs/CONFIG.md b/PaddleCV/PaddleDetection/docs/CONFIG.md deleted file mode 100644 index 3cba54eb546cfb648cc7b5bd2e135652a040b309..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/CONFIG.md +++ /dev/null @@ -1,206 +0,0 @@ -English | [简体中文](CONFIG_cn.md) - -# Config Pipline - -## Introduction - -PaddleDetection takes a rather principled approach to configuration management. We aim to automate the configuration workflow and to reduce configuration errors. - - -## Rationale - -Presently, configuration in mainstream frameworks are usually dictionary based: the global config is simply a giant, loosely defined Python dictionary. - -This approach is error prone, e.g., misspelled or displaced keys may lead to serious errors in training process, causing time loss and wasted resources. - -To avoid the common pitfalls, with automation and static analysis in mind, we propose a configuration design that is user friendly, easy to maintain and extensible. - - -## Design - -The design utilizes some of Python's reflection mechanism to extract configuration schematics from Python class definitions. - -To be specific, it extracts information from class constructor arguments, including names, docstrings, default values, data types (if type hints are available). - -This approach advocates modular and testable design, leading to a unified and extensible code base. - - -### API - -Most of the functionality is exposed in `ppdet.core.workspace` module. - -- `register`: This decorator register a class as configurable module; it understands several special annotations in the class definition. - - `__category__`: For better organization, modules are classified into categories. - - `__inject__`: A list of constructor arguments, which are intended to take module instances as input, module instances will be created at runtime an injected. The corresponding configuration value can be a class name string, a serialized object, a config key pointing to a serialized object, or a dict (in which case the constructor needs to handle it, see example below). - - `__op__`: Shortcut for wrapping PaddlePaddle operators into a callable objects, together with `__append_doc__` (extracting docstring from target PaddlePaddle operator automatically), this can be a real time saver. -- `serializable`: This decorator make a class directly serializable in yaml config file, by taking advantage of [pyyaml](https://pyyaml.org/wiki/PyYAMLDocumentation)'s serialization mechanism. -- `create`: Constructs a module instance according to global configuration. -- `load_config` and `merge_config`: Loading yaml file and merge config settings from command line. - - -### Example - -Take the `RPNHead` module for example, it is composed of several PaddlePaddle operators. We first wrap those operators into classes, then pass in instances of these classes when instantiating the `RPNHead` module. - -```python -# excerpt from `ppdet/modeling/ops.py` -from ppdet.core.workspace import register, serializable - -# ... more operators - -@register -@serializable -class GenerateProposals(object): - # NOTE this class simply wraps a PaddlePaddle operator - __op__ = fluid.layers.generate_proposals - # NOTE docstring for args are extracted from PaddlePaddle OP - __append_doc__ = True - - def __init__(self, - pre_nms_top_n=6000, - post_nms_top_n=1000, - nms_thresh=.5, - min_size=.1, - eta=1.): - super(GenerateProposals, self).__init__() - self.pre_nms_top_n = pre_nms_top_n - self.post_nms_top_n = post_nms_top_n - self.nms_thresh = nms_thresh - self.min_size = min_size - self.eta = eta - -# ... more operators - -# excerpt from `ppdet/modeling/anchor_heads/rpn_head.py` -from ppdet.core.workspace import register -from ppdet.modeling.ops import AnchorGenerator, RPNTargetAssign, GenerateProposals - -@register -class RPNHead(object): - """ - RPN Head - - Args: - anchor_generator (object): `AnchorGenerator` instance - rpn_target_assign (object): `RPNTargetAssign` instance - train_proposal (object): `GenerateProposals` instance for training - test_proposal (object): `GenerateProposals` instance for testing - """ - __inject__ = [ - 'anchor_generator', 'rpn_target_assign', 'train_proposal', - 'test_proposal' - ] - - def __init__(self, - anchor_generator=AnchorGenerator().__dict__, - rpn_target_assign=RPNTargetAssign().__dict__, - train_proposal=GenerateProposals(12000, 2000).__dict__, - test_proposal=GenerateProposals().__dict__): - super(RPNHead, self).__init__() - self.anchor_generator = anchor_generator - self.rpn_target_assign = rpn_target_assign - self.train_proposal = train_proposal - self.test_proposal = test_proposal - if isinstance(anchor_generator, dict): - self.anchor_generator = AnchorGenerator(**anchor_generator) - if isinstance(rpn_target_assign, dict): - self.rpn_target_assign = RPNTargetAssign(**rpn_target_assign) - if isinstance(train_proposal, dict): - self.train_proposal = GenerateProposals(**train_proposal) - if isinstance(test_proposal, dict): - self.test_proposal = GenerateProposals(**test_proposal) -``` - -The corresponding(generated) YAML snippet is as follows, note this is the configuration in **FULL**, all the default values can be omitted. In case of the above example, all arguments have default value, meaning nothing is required in the config file. - -```yaml -RPNHead: - test_proposal: - eta: 1.0 - min_size: 0.1 - nms_thresh: 0.5 - post_nms_top_n: 1000 - pre_nms_top_n: 6000 - train_proposal: - eta: 1.0 - min_size: 0.1 - nms_thresh: 0.5 - post_nms_top_n: 2000 - pre_nms_top_n: 12000 - anchor_generator: - # ... - rpn_target_assign: - # ... -``` - -Example snippet that make use of the `RPNHead` module. - -```python -from ppdet.core.workspace import load_config, merge_config, create - -load_config('some_config_file.yml') -merge_config(more_config_options_from_command_line) - -rpn_head = create('RPNHead') -# ... code that use the created module! -``` - -Configuration file can also have serialized objects in it, denoted with `!`, for example - -```yaml -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [60000, 80000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 -``` - -[Complete config files](config_example/) of multiple detection architectures are given and brief description of each parameter. - -## Requirements - -Two Python packages are used, both are optional. - -- [typeguard](https://github.com/agronholm/typeguard) is used for type checking in Python 3. -- [docstring\_parser](https://github.com/rr-/docstring_parser) is needed for docstring parsing. - -To install them, simply run: - -```shell -pip install typeguard http://github.com/willthefrog/docstring_parser/tarball/master -``` - - -## Tooling - -A small utility (`tools/configure.py`) is included to simplify the configuration process, it provides 4 commands to walk users through the configuration process: - -1. `list`: List currently registered modules by category, one can also specify which category to list with the `--category` flag. -2. `help`: Get help information for a module, including description, options, configuration template and example command line flags. -3. `analyze`: Check configuration file for missing/extraneous options, options with mismatch type (if type hint is given) and missing dependencies, it also highlights user provided values (overridden default values). -4. `generate`: Generate a configuration template for a given list of modules. By default it generates a complete configuration file, which can be quite verbose; if a `--minimal` flag is given, it generates a template that only contain non optional settings. For example, to generate a configuration for Faster R-CNN architecture with `ResNet` backbone and `FPN`, run: - - ```shell - python tools/configure.py generate FasterRCNN ResNet RPNHead RoIAlign BBoxAssigner BBoxHead FasterRCNNTrainFeed FasterRCNNTestFeed LearningRate OptimizerBuilder - ``` - - For a minimal version, run: - - ```shell - python tools/configure.py --minimal generate FasterRCNN BBoxHead - ``` - - -## FAQ - -**Q:** There are some configuration options that are used by multiple modules (e.g., `num_classes`), how do I avoid duplication in config files? - -**A:** We provided a `__shared__` annotation for exactly this purpose, simply annotate like this `__shared__ = ['num_classes']`. It works as follows: - -1. if `num_classes` is configured for a module in config file, it takes precedence. -2. if `num_classes` is not configured for a module but is present in the config file as a global key, its value will be used. -3. otherwise, the default value (`81`) will be used. diff --git a/PaddleCV/PaddleDetection/docs/CONFIG_cn.md b/PaddleCV/PaddleDetection/docs/CONFIG_cn.md deleted file mode 100644 index 8b7eaa653a65264db189fa88a125ce10b5a6f667..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/CONFIG_cn.md +++ /dev/null @@ -1,196 +0,0 @@ -# 配置模块 - -## 简介 - -为了使配置过程更加自动化并减少配置错误,PaddleDetection的配置管理采取了较为严谨的设计。 - - -## 设计思想 - -目前主流框架全局配置基本是一个Python dict,这种设计对配置的检查并不严格,拼写错误或者遗漏的配置项往往会造成训练过程中的严重错误,进而造成时间及资源的浪费。为了避免这些陷阱,从自动化和静态分析的原则出发,PaddleDetection采用了一种用户友好、 易于维护和扩展的配置设计。 - - -## 基本设计 - -利用Python的反射机制,PaddleDection的配置系统从Python类的构造函数抽取多种信息 - 如参数名、初始值、参数注释、数据类型(如果给出type hint)- 来作为配置规则。 这种设计便于设计的模块化,提升可测试性及扩展性。 - - -### API - -配置系统的大多数功能由 `ppdet.core.workspace` 模块提供 - -- `register`: 装饰器,将类注册为可配置模块;能够识别类定义中的一些特殊标注。 - - `__category__`: 为便于组织,模块可以分为不同类别。 - - `__inject__`: 如果模块由多个子模块组成,可以这些子模块实例作为构造函数的参数注入。对应的默认值及配置项可以是类名字符串,yaml序列化的对象,指向序列化对象的配置键值或者Python dict(构造函数需要对其作出处理,参见下面的例子)。 - - `__op__`: 配合 `__append_doc__` (抽取目标OP的 注释)使用,可以方便快速的封装PaddlePaddle底层OP。 -- `serializable`: 装饰器,利用 [pyyaml](https://pyyaml.org/wiki/PyYAMLDocumentation) 的序列化机制,可以直接将一个类实例序列化及反序列化。 -- `create`: 根据全局配置构造一个模块实例。 -- `load_config` and `merge_config`: 加载yaml文件,合并命令行提供的配置项。 - - -### 示例 - -以 `RPNHead` 模块为例,该模块包含多个PaddlePaddle OP,先将这些OP封装成类,并将其实例在构造 `RPNHead` 时注入。 - -```python -# excerpt from `ppdet/modeling/ops.py` -from ppdet.core.workspace import register, serializable - -# ... more operators - -@register -@serializable -class GenerateProposals(object): - # NOTE this class simply wraps a PaddlePaddle operator - __op__ = fluid.layers.generate_proposals - # NOTE docstring for args are extracted from PaddlePaddle OP - __append_doc__ = True - - def __init__(self, - pre_nms_top_n=6000, - post_nms_top_n=1000, - nms_thresh=.5, - min_size=.1, - eta=1.): - super(GenerateProposals, self).__init__() - self.pre_nms_top_n = pre_nms_top_n - self.post_nms_top_n = post_nms_top_n - self.nms_thresh = nms_thresh - self.min_size = min_size - self.eta = eta - -# ... more operators - -# excerpt from `ppdet/modeling/anchor_heads/rpn_head.py` -from ppdet.core.workspace import register -from ppdet.modeling.ops import AnchorGenerator, RPNTargetAssign, GenerateProposals - -@register -class RPNHead(object): - """ - RPN Head - - Args: - anchor_generator (object): `AnchorGenerator` instance - rpn_target_assign (object): `RPNTargetAssign` instance - train_proposal (object): `GenerateProposals` instance for training - test_proposal (object): `GenerateProposals` instance for testing - """ - __inject__ = [ - 'anchor_generator', 'rpn_target_assign', 'train_proposal', - 'test_proposal' - ] - - def __init__(self, - anchor_generator=AnchorGenerator().__dict__, - rpn_target_assign=RPNTargetAssign().__dict__, - train_proposal=GenerateProposals(12000, 2000).__dict__, - test_proposal=GenerateProposals().__dict__): - super(RPNHead, self).__init__() - self.anchor_generator = anchor_generator - self.rpn_target_assign = rpn_target_assign - self.train_proposal = train_proposal - self.test_proposal = test_proposal - if isinstance(anchor_generator, dict): - self.anchor_generator = AnchorGenerator(**anchor_generator) - if isinstance(rpn_target_assign, dict): - self.rpn_target_assign = RPNTargetAssign(**rpn_target_assign) - if isinstance(train_proposal, dict): - self.train_proposal = GenerateProposals(**train_proposal) - if isinstance(test_proposal, dict): - self.test_proposal = GenerateProposals(**test_proposal) -``` - -对应的yaml配置如下,请注意这里给出的是 **完整** 配置,其中所有默认值配置项都可以省略。上面的例子中的模块所有的构造函数参数都提供了默认值,因此配置文件中可以完全略过其配置。 - -```yaml -RPNHead: - test_proposal: - eta: 1.0 - min_size: 0.1 - nms_thresh: 0.5 - post_nms_top_n: 1000 - pre_nms_top_n: 6000 - train_proposal: - eta: 1.0 - min_size: 0.1 - nms_thresh: 0.5 - post_nms_top_n: 2000 - pre_nms_top_n: 12000 - anchor_generator: - # ... - rpn_target_assign: - # ... -``` - -`RPNHead` 模块实际使用代码示例。 - -```python -from ppdet.core.workspace import load_config, merge_config, create - -load_config('some_config_file.yml') -merge_config(more_config_options_from_command_line) - -rpn_head = create('RPNHead') -# ... code that use the created module! -``` - -配置文件用可以直接序列化模块实例,用 `!` 标示,如 - -```yaml -LearningRate: - base_lr: 0.01 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [60000, 80000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 -``` - -[示例配置文件](config_example/)中给出了多种检测结构的完整配置文件,以及其中各个超参的简要说明。 - -## 安装依赖 - -配置系统用到两个Python包,均为可选安装。 - -- [typeguard](https://github.com/agronholm/typeguard) 在Python 3中用来进行数据类型验证。 -- [docstring\_parser](https://github.com/rr-/docstring_parser) 用来解析注释。 - -如需安装,运行下面命令即可。 - -```shell -pip install typeguard http://github.com/willthefrog/docstring_parser/tarball/master -``` - - -## 相关工具 - -为了方便用户配置,PaddleDection提供了一个工具 (`tools/configure.py`), 共支持四个子命令: - -1. `list`: 列出当前已注册的模块,如需列出具体类别的模块,可以使用 `--category` 指定。 -2. `help`: 显示指定模块的帮助信息,如描述,配置项,配置文件模板及命令行示例。 -3. `analyze`: 检查配置文件中的缺少或者多余的配置项以及依赖缺失,如果给出type hint, 还可以检查配置项中错误的数据类型。非默认配置也会高亮显示。 -4. `generate`: 根据给出的模块列表生成配置文件,默认生成完整配置,如果指定 `--minimal` ,生成最小配置,即省略所有默认配置项。例如,执行下列命令可以生成Faster R-CNN (`ResNet` backbone + `FPN`) 架构的配置文件: - - ```shell - python tools/configure.py generate FasterRCNN ResNet RPNHead RoIAlign BBoxAssigner BBoxHead FasterRCNNTrainFeed FasterRCNNTestFeed LearningRate OptimizerBuilder - ``` - - 如需最小配置,运行: - - ```shell - python tools/configure.py --minimal generate FasterRCNN BBoxHead - ``` - - -## FAQ - -**Q:** 某些配置项会在多个模块中用到(如 `num_classes`),如何避免在配置文件中多次重复设置? - -**A:** 框架提供了 `__shared__` 标记来实现配置的共享,用户可以标记参数,如 `__shared__ = ['num_classes']` ,配置数值作用规则如下: - -1. 如果模块配置中提供了 `num_classes` ,会优先使用其数值。 -2. 如果模块配置中未提供 `num_classes` ,但配置文件中存在全局键值,那么会使用全局键值。 -3. 两者均为配置的情况下,将使用默认值(`81`)。 diff --git a/PaddleCV/PaddleDetection/docs/DATA.md b/PaddleCV/PaddleDetection/docs/DATA.md deleted file mode 100644 index 466405022c2a6d076844ffb73ebd74565240ca55..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/DATA.md +++ /dev/null @@ -1,235 +0,0 @@ -English | [简体中文](DATA_cn.md) - -# Data Pipline - -## Introduction - -The data pipeline is responsible for loading and converting data. Each -resulting data sample is a tuple of np.ndarrays. -For example, Faster R-CNN training uses samples of this format: `[(im, -im_info, im_id, gt_bbox, gt_class, is_crowd), (...)]`. - -### Implementation - -The data pipeline consists of four sub-systems: data parsing, image -pre-processing, data conversion and data feeding APIs. - -Data samples are collected to form `data.Dataset`s, usually 3 sets are -needed for training, validation, and testing respectively. - -First, `data.source` loads the data files into memory, then -`data.transform` processes them, and lastly, the batched samples -are fetched by `data.Reader`. - -Sub-systems details: -1. Data parsing -Parses various data sources and creates `data.Dataset` instances. Currently, -following data sources are supported: - -- COCO data source - -Loads `COCO` type datasets with directory structures like this: - - ``` - dataset/coco/ - ├── annotations - │ ├── instances_train2014.json - │ ├── instances_train2017.json - │ ├── instances_val2014.json - │ ├── instances_val2017.json - │ | ... - ├── train2017 - │ ├── 000000000009.jpg - │ ├── 000000580008.jpg - │ | ... - ├── val2017 - │ ├── 000000000139.jpg - │ ├── 000000000285.jpg - │ | ... - | ... - ``` - -- Pascal VOC data source - -Loads `Pascal VOC` like datasets with directory structure like this: - - ``` - dataset/voc/ - ├── train.txt - ├── val.txt - ├── test.txt - ├── label_list.txt (optional) - ├── VOCdevkit/VOC2007 - │ ├── Annotations - │ ├── 001789.xml - │ | ... - │ ├── JPEGImages - │ ├── 001789.xml - │ | ... - │ ├── ImageSets - │ | ... - ├── VOCdevkit/VOC2012 - │ ├── Annotations - │ ├── 003876.xml - │ | ... - │ ├── JPEGImages - │ ├── 003876.xml - │ | ... - │ ├── ImageSets - │ | ... - | ... - ``` - -**NOTE:** If you set `use_default_label=False` in yaml configs, the `label_list.txt` -of Pascal VOC dataset will be read, otherwise, `label_list.txt` is unnecessary and -the default Pascal VOC label list which defined in -[voc\_loader.py](../ppdet/data/source/voc_loader.py) will be used. - -- Roidb data source -A generalized data source serialized as pickle files, which have the following -structure: -```python -(records, cname2id) -# `cname2id` is a `dict` which maps category name to class IDs -# and `records` is a list of dict of this structure: -{ - 'im_file': im_fname, # image file name - 'im_id': im_id, # image ID - 'h': im_h, # height of image - 'w': im_w, # width of image - 'is_crowd': is_crowd, # crowd marker - 'gt_class': gt_class, # ground truth class - 'gt_bbox': gt_bbox, # ground truth bounding box - 'gt_poly': gt_poly, # ground truth segmentation -} -``` - -We provide a tool to generate roidb data sources. To convert `COCO` or `VOC` -like dataset, run this command: -```sh -# --type: the type of original data (xml or json) -# --annotation: the path of file, which contains the name of annotation files -# --save-dir: the save path -# --samples: the number of samples (default is -1, which mean all datas in dataset) -python ./ppdet/data/tools/generate_data_for_training.py - --type=json \ - --annotation=./annotations/instances_val2017.json \ - --save-dir=./roidb \ - --samples=-1 -``` - - 2. Image preprocessing -the `data.transform.operator` module provides operations such as image -decoding, expanding, cropping, etc. Multiple operators are combined to form -larger processing pipelines. - - 3. Data transformer -Transform a `data.Dataset` to achieve various desired effects, Notably: the -`data.transform.paralle_map` transformer accelerates image processing with -multi-threads or multi-processes. More transformers can be found in -`data.transform.transformer`. - - 4. Data feeding apis -To facilitate data pipeline building, we combine multiple `data.Dataset` to -form a `data.Reader` which can provide data for training, validation and -testing respectively. Users can simply call `Reader.[train|eval|infer]` to get -the corresponding data stream. Many aspect of the `Reader`, such as storage -location, preprocessing pipeline, acceleration mode can be configured with yaml -files. - -### APIs - -The main APIs are as follows: - -1. Data parsing - - - `source/coco_loader.py`: COCO dataset parser. [source](../ppdet/data/source/coco_loader.py) - - `source/voc_loader.py`: Pascal VOC dataset parser. [source](../ppdet/data/source/voc_loader.py) - [Note] To use a non-default label list for VOC datasets, a `label_list.txt` - file is needed, one can use the provided label list - (`data/pascalvoc/ImageSets/Main/label_list.txt`) or generate a custom one (with `tools/generate_data_for_training.py`). Also, `use_default_label` option should - be set to `false` in the configuration file - - `source/loader.py`: Roidb dataset parser. [source](../ppdet/data/source/loader.py) - -2. Operator - `transform/operators.py`: Contains a variety of data augmentation methods, including: -- `DecodeImage`: Read images in RGB format. -- `RandomFlipImage`: Horizontal flip. -- `RandomDistort`: Distort brightness, contrast, saturation, and hue. -- `ResizeImage`: Resize image with interpolation. -- `RandomInterpImage`: Use a random interpolation method to resize the image. -- `CropImage`: Crop image with respect to different scale, aspect ratio, and overlap. -- `ExpandImage`: Pad image to a larger size, padding filled with mean image value. -- `NormalizeImage`: Normalize image pixel values. -- `NormalizeBox`: Normalize the bounding box. -- `Permute`: Arrange the channels of the image and optionally convert image to BGR format. -- `MixupImage`: Mixup two images with given fraction[1](#mix). - -[1] Please refer to [this paper](https://arxiv.org/pdf/1710.09412.pdf)。 - -`transform/arrange_sample.py`: Assemble the data samples needed by different models. -3. Transformer -`transform/post_map.py`: Transformations that operates on whole batches, mainly for: -- Padding whole batch to given stride values -- Resize images to Multi-scales -- Randomly adjust the image size of the batch data -`transform/transformer.py`: Data filtering batching. -`transform/parallel_map.py`: Accelerate data processing with multi-threads/multi-processes. -4. Reader -`reader.py`: Combine source and transforms, return batch data according to `max_iter`. -`data_feed.py`: Configure default parameters for `reader.py`. - - -### Usage - -#### Canned Datasets - -Preset for common datasets, e.g., `COCO` and `Pascal Voc` are included. In -most cases, user can simply use these canned dataset as is. Moreover, the -whole data pipeline is fully customizable through the yaml configuration files. - -#### Custom Datasets - -- Option 1: Convert the dataset to COCO format. -```sh - # a small utility (`tools/x2coco.py`) is provided to convert - # Labelme-annotated dataset or cityscape dataset to COCO format. - python ./ppdet/data/tools/x2coco.py --dataset_type labelme - --json_input_dir ./labelme_annos/ - --image_input_dir ./labelme_imgs/ - --output_dir ./cocome/ - --train_proportion 0.8 - --val_proportion 0.2 - --test_proportion 0.0 - # --dataset_type: The data format which is need to be converted. Currently supported are: 'labelme' and 'cityscape' - # --json_input_dir:The path of json files which are annotated by Labelme. - # --image_input_dir:The path of images. - # --output_dir:The path of coverted COCO dataset. - # --train_proportion:The train proportion of annatation data. - # --val_proportion:The validation proportion of annatation data. - # --test_proportion: The inference proportion of annatation data. -``` - -- Option 2: - -1. Add `source/XX_loader.py` and implement the `load` function, following the - example of `source/coco_loader.py` and `source/voc_loader.py`. -2. Modify the `load` function in `source/loader.py` to make use of the newly - added data loader. -3. Modify `/source/__init__.py` accordingly. -```python -if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource']: - source_type = 'RoiDbSource' -# Replace the above code with the following code: -if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource', 'XXSource']: - source_type = 'RoiDbSource' -``` -4. In the configure file, define the `type` of `dataset` as `XXSource`. - -#### How to add data pre-processing? - -- To add pre-processing operation for a single image, refer to the classes in - `transform/operators.py`, and implement the desired transformation with a new - class. -- To add pre-processing for a batch, one needs to modify the `build_post_map` - function in `transform/post_map.py`. diff --git a/PaddleCV/PaddleDetection/docs/DATA_cn.md b/PaddleCV/PaddleDetection/docs/DATA_cn.md deleted file mode 100644 index 332e81b0d313e9e8072979667b614de6924cec45..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/DATA_cn.md +++ /dev/null @@ -1,212 +0,0 @@ -# 数据模块 - -## 介绍 -本模块是一个Python模块,用于加载数据并将其转换成适用于检测模型的训练、验证、测试所需要的格式——由多个np.ndarray组成的tuple数组,例如用于Faster R-CNN模型的训练数据格式为:`[(im, im_info, im_id, gt_bbox, gt_class, is_crowd), (...)]`。 - -### 实现 -该模块内部可分为4个子功能:数据解析、图片预处理、数据转换和数据获取接口。 - -我们采用`data.Dataset`表示一份数据,比如`COCO`数据包含3份数据,分别用于训练、验证和测试。原始数据存储与文件中,通过`data.source`加载到内存,然后使用`data.transform`对数据进行处理转换,最终通过`data.Reader`的接口可以获得用于训练、验证和测试的batch数据。 - -子功能介绍: - -1. 数据解析 - -数据解析得到的是`data.Dataset`,实现逻辑位于`data.source`中。通过它可以实现解析不同格式的数据集,已支持的数据源包括: - -- COCO数据源 - -该数据集目前分为COCO2014和COCO2017,主要由json文件和image文件组成,其组织结构如下所示: - - ``` - dataset/coco/ - ├── annotations - │ ├── instances_train2014.json - │ ├── instances_train2017.json - │ ├── instances_val2014.json - │ ├── instances_val2017.json - │ | ... - ├── train2017 - │ ├── 000000000009.jpg - │ ├── 000000580008.jpg - │ | ... - ├── val2017 - │ ├── 000000000139.jpg - │ ├── 000000000285.jpg - │ | ... - | ... - ``` - - -- Pascal VOC数据源 - -该数据集目前分为VOC2007和VOC2012,主要由xml文件和image文件组成,其组织结构如下所示: - - ``` - dataset/voc/ - ├── train.txt - ├── val.txt - ├── test.txt - ├── label_list.txt (optional) - ├── VOCdevkit/VOC2007 - │ ├── Annotations - │ ├── 001789.xml - │ | ... - │ ├── JPEGImages - │ ├── 001789.xml - │ | ... - │ ├── ImageSets - │ | ... - ├── VOCdevkit/VOC2012 - │ ├── Annotations - │ ├── 003876.xml - │ | ... - │ ├── JPEGImages - │ ├── 003876.xml - │ | ... - │ ├── ImageSets - │ | ... - | ... - ``` - -**说明:** 如果你在yaml配置文件中设置`use_default_label=False`, 将从`label_list.txt` -中读取类别列表,反之则可以没有`label_list.txt`文件,检测库会使用Pascal VOC数据集的默 -认类别列表,默认类别列表定义在[voc\_loader.py](../ppdet/data/source/voc_loader.py) - -- Roidb数据源 - 该数据集主要由COCO数据集和Pascal VOC数据集转换而成的pickle文件,包含一个dict,而dict中只包含一个命名为‘records’的list(可能还有一个命名为‘cname2cid’的字典),其内容如下所示: -```python -(records, catname2clsid) -'records'是一个list并且它的结构如下: -{ - 'im_file': im_fname, # 图像文件名 - 'im_id': im_id, # 图像id - 'h': im_h, # 图像高度 - 'w': im_w, # 图像宽度 - 'is_crowd': is_crowd, # 是否重叠 - 'gt_class': gt_class, # 真实框类别 - 'gt_bbox': gt_bbox, # 真实框坐标 - 'gt_poly': gt_poly, # 多边形坐标 -} -'cname2id'是一个dict,保存了类别名到id的映射 - -``` -我们在`./tools/`中提供了一个生成roidb数据集的代码,可以通过下面命令实现该功能。 -``` -# --type: 原始数据集的类别(只能是xml或者json) -# --annotation: 一个包含所需标注文件名的文件的路径 -# --save-dir: 保存路径 -# --samples: sample的个数(默认是-1,代表使用所有sample) -python ./ppdet/data/tools/generate_data_for_training.py - --type=json \ - --annotation=./annotations/instances_val2017.json \ - --save-dir=./roidb \ - --samples=-1 -``` - 2. 图片预处理 - 图片预处理通过包括图片解码、缩放、裁剪等操作,我们采用`data.transform.operator`算子的方式来统一实现,这样能方便扩展。此外,多个算子还可以组合形成复杂的处理流程, 并被`data.transformer`中的转换器使用,比如多线程完成一个复杂的预处理流程。 - - 3. 数据转换器 - 数据转换器的功能是完成对某个`data.Dataset`进行转换处理,从而得到一个新的`data.Dataset`。我们采用装饰器模式实现各种不同的`data.transform.transformer`。比如用于多进程预处理的`dataset.transform.paralle_map`转换器。 - - 4. 数据获取接口 - 为方便训练时的数据获取,我们将多个`data.Dataset`组合在一起构成一个`data.Reader`为用户提供数据,用户只需要调用`Reader.[train|eval|infer]`即可获得对应的数据流。`Reader`支持yaml文件配置数据地址、预处理过程、加速方式等。 - -### APIs - -主要的APIs如下: - - -1. 数据解析 - - - `source/coco_loader.py`:用于解析COCO数据集。[详见代码](../ppdet/data/source/coco_loader.py) - - `source/voc_loader.py`:用于解析Pascal VOC数据集。[详见代码](../ppdet/data/source/voc_loader.py) - [注意]在使用VOC数据集时,若不使用默认的label列表,则需要先使用`tools/generate_data_for_training.py`生成`label_list.txt`(使用方式与数据解析中的roidb数据集获取过程一致),或提供`label_list.txt`放置于`data/pascalvoc/ImageSets/Main`中;同时在配置文件中设置参数`use_default_label`为`true`。 - - `source/loader.py`:用于解析Roidb数据集。[详见代码](../ppdet/data/source/loader.py) - -2. 算子 - `transform/operators.py`:包含多种数据增强方式,主要包括: - -``` python -RandomFlipImage:水平翻转。 -RandomDistort:随机扰动图片亮度、对比度、饱和度和色相。 -ResizeImage:根据特定的插值方式调整图像大小。 -RandomInterpImage:使用随机的插值方式调整图像大小。 -CropImage:根据缩放比例、长宽比例两个参数生成若干候选框,再依据这些候选框和标注框的面积交并比(IoU)挑选出符合要求的裁剪结果。 -ExpandImage:将原始图片放进一张使用像素均值填充(随后会在减均值操作中减掉)的扩张图中,再对此图进行裁剪、缩放和翻转。 -DecodeImage:以RGB格式读取图像。 -Permute:对图像的通道进行排列并转为BGR格式。 -NormalizeImage:对图像像素值进行归一化。 -NormalizeBox:对bounding box进行归一化。 -MixupImage:按比例叠加两张图像。 -``` -[注意]:Mixup的操作可参考[论文](https://arxiv.org/pdf/1710.09412.pdf)。 - -`transform/arrange_sample.py`:实现对输入网络数据的排序。 -3. 转换 -`transform/post_map.py`:用于完成批数据的预处理操作,其主要包括: - -``` python -随机调整批数据的图像大小 -多尺度调整图像大小 -padding操作 -``` -`transform/transformer.py`:用于过滤无用的数据,并返回批数据。 -`transform/parallel_map.py`:用于实现加速。 -4. 读取 -`reader.py`:用于组合source和transformer操作,根据`max_iter`返回batch数据。 -`data_feed.py`: 用于配置 `reader.py`中所需的默认参数. - - - - -### 使用 -#### 常规使用 -结合yaml文件中的配置信息,完成本模块的功能。yaml文件的使用可以参见配置文件部分。 - - - 读取用于训练的数据 - -``` python -ccfg = load_cfg('./config.yml') -coco = Reader(ccfg.DATA, ccfg.TRANSFORM, maxiter=-1) -``` -#### 如何使用自定义数据集? - -- 选择1:将数据集转换为COCO格式。 -``` - # 在./tools/中提供了x2coco.py用于将labelme标注的数据集或cityscape数据集转换为COCO数据集 - python ./ppdet/data/tools/x2coco.py --dataset_type labelme - --json_input_dir ./labelme_annos/ - --image_input_dir ./labelme_imgs/ - --output_dir ./cocome/ - --train_proportion 0.8 - --val_proportion 0.2 - --test_proportion 0.0 - # --dataset_type:需要转换的数据格式,目前支持:’labelme‘和’cityscape‘ - # --json_input_dir:使用labelme标注的json文件所在文件夹 - # --image_input_dir:图像文件所在文件夹 - # --output_dir:转换后的COCO格式数据集存放位置 - # --train_proportion:标注数据中用于train的比例 - # --val_proportion:标注数据中用于validation的比例 - # --test_proportion: 标注数据中用于infer的比例 -``` -- 选择2: - -1. 仿照`./source/coco_loader.py`和`./source/voc_loader.py`,添加`./source/XX_loader.py`并实现`load`函数。 -2. 在`./source/loader.py`的`load`函数中添加使用`./source/XX_loader.py`的入口。 -3. 修改`./source/__init__.py`: - - -```python -if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource']: - source_type = 'RoiDbSource' -# 将上述代码替换为如下代码: -if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource', 'XXSource']: - source_type = 'RoiDbSource' -``` - -4. 在配置文件中修改`dataset`下的`type`为`XXSource`。 - -#### 如何增加数据预处理? -- 若增加单张图像的增强预处理,可在`transform/operators.py`中参考每个类的代码,新建一个类来实现新的数据增强;同时在配置文件中增加该预处理。 -- 若增加单个batch的图像预处理,可在`transform/post_map.py`中参考`build_post_map`中每个函数的代码,新建一个内部函数来实现新的批数据预处理;同时在配置文件中增加该预处理。 diff --git a/PaddleCV/PaddleDetection/docs/EXPORT_MODEL.md b/PaddleCV/PaddleDetection/docs/EXPORT_MODEL.md deleted file mode 100644 index 614d87e29c5e5d5685f64c9ee78cf7d1d5192d09..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/EXPORT_MODEL.md +++ /dev/null @@ -1,48 +0,0 @@ -# 模型导出 - -训练得到一个满足要求的模型后,如果想要将该模型接入到C++预测库或者Serving服务,需要通过`tools/export_model.py`导出该模型。 - -## 启动参数说明 - -| FLAG | 用途 | 默认值 | 备注 | -|:--------------:|:--------------:|:------------:|:-----------------------------------------:| -| -c | 指定配置文件 | None | | -| --output_dir | 模型保存路径 | `./output` | 模型默认保存在`output/配置文件名/`路径下 | - -## 使用示例 - -使用[训练/评估/推断](GETTING_STARTED_cn.md)中训练得到的模型进行试用,脚本如下 - -```bash -# 导出FasterRCNN模型, 模型中data层默认的shape为3x800x1333 -python tools/export_model.py -c configs/faster_rcnn_r50_1x.yml \ - --output_dir=./inference_model \ - -o weights=output/faster_rcnn_r50_1x/model_final \ - -``` - -预测模型会导出到`inference_model/faster_rcnn_r50_1x`目录下,模型名和参数名分别为`__model__`和`__params__`。 - -## 设置导出模型的输入大小 - -使用Fluid-TensorRT进行预测时,由于<=TensorRT 5.1的版本仅支持定长输入,保存模型的`data`层的图片大小需要和实际输入图片大小一致。而Fluid C++预测引擎没有此限制。可通过设置TestFeed的`image_shape`可以修改保存模型中的输入图片大小。示例如下: - -```bash -# 导出FasterRCNN模型,输入是3x640x640 -python tools/export_model.py -c configs/faster_rcnn_r50_1x.yml \ - --output_dir=./inference_model \ - -o weights=https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar \ - FasterRCNNTestFeed.image_shape=[3,640,640] - -# 导出YOLOv3模型,输入是3x320x320 -python tools/export_model.py -c configs/yolov3_darknet.yml \ - --output_dir=./inference_model \ - -o weights=https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar \ - YoloTestFeed.image_shape=[3,320,320] - -# 导出SSD模型,输入是3x300x300 -python tools/export_model.py -c configs/ssd/ssd_mobilenet_v1_voc.yml \ - --output_dir=./inference_model \ - -o weights= https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar \ - SSDTestFeed.image_shape=[3,300,300] -``` diff --git a/PaddleCV/PaddleDetection/docs/GETTING_STARTED.md b/PaddleCV/PaddleDetection/docs/GETTING_STARTED.md deleted file mode 100644 index 199b343ed2a80a1bf80fcf3d3206fbdf8413551f..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/GETTING_STARTED.md +++ /dev/null @@ -1,187 +0,0 @@ -English | [简体中文](GETTING_STARTED_cn.md) - -# Getting Started - -For setting up the running environment, please refer to [installation -instructions](INSTALL.md). - - -## Training/Evaluation/Inference - -PaddleDetection provides scripots for training, evalution and inference with various features according to different configure. - -```bash -# set PYTHONPATH -export PYTHONPATH=$PYTHONPATH:. -# training in single-GPU and multi-GPU. specify different GPU numbers by CUDA_VISIBLE_DEVICES -export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python tools/train.py -c configs/faster_rcnn_r50_1x.yml -# GPU evalution -export CUDA_VISIBLE_DEVICES=0 -python tools/eval.py -c configs/faster_rcnn_r50_1x.yml -# Inference -python tools/infer.py -c configs/faster_rcnn_r50_1x.yml --infer_img=demo/000000570688.jpg -``` - -### Optional argument list - -list below can be viewed by `--help` - -| FLAG | script supported | description | default | remark | -| :----------------------: | :------------: | :---------------: | :--------------: | :-----------------: | -| -c | ALL | Select config file | None | **The whole description of configure can refer to [config_example](config_example)** | -| -o | ALL | Set parameters in configure file | None | `-o` has higher priority to file configured by `-c`. Such as `-o use_gpu=False max_iter=10000` | -| -r/--resume_checkpoint | train | Checkpoint path for resuming training | None | `-r output/faster_rcnn_r50_1x/10000` | -| --eval | train | Whether to perform evaluation in training | False | | -| --output_eval | train/eval | json path in evalution | current path | `--output_eval ./json_result` | -| -d/--dataset_dir | train/eval | path for dataset, same as dataset_dir in configs | None | `-d dataset/coco` | -| --fp16 | train | Whether to enable mixed precision training | False | GPU training is required | -| --loss_scale | train | Loss scaling factor for mixed precision training | 8.0 | enable when `--fp16` is True | -| --json_eval | eval | Whether to evaluate with already existed bbox.json or mask.json | False | json path is set in `--output_eval` | -| --output_dir | infer | Directory for storing the output visualization files | `./output` | `--output_dir output` | -| --draw_threshold | infer | Threshold to reserve the result for visualization | 0.5 | `--draw_threshold 0.7` | -| --infer_dir | infer | Directory for images to perform inference on | None | | -| --infer_img | infer | Image path | None | higher priority over --infer_dir | -| --use_tb | train/infer | Whether to record the data with [tb-paddle](https://github.com/linshuliang/tb-paddle), so as to display in Tensorboard | False | | -| --tb\_log_dir | train/infer | tb-paddle logging directory for image | train:`tb_log_dir/scalar` infer: `tb_log_dir/image` | | - - -## Examples - -### Training - -- Perform evaluation in training - - ```bash - export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 - python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml --eval - ``` - - Perform training and evalution alternatively and evaluate at each snapshot_iter. Meanwhile, the best model with highest MAP is saved at each `snapshot_iter` which has the same path as `model_final`. - - If evaluation dataset is large, we suggest decreasing evaluation times or evaluating after training. - -- Fine-tune other task - - When using pre-trained model to fine-tune other task, two methods can be used: - - 1. The excluded pre-trained parameters can be set by `finetune_exclude_pretrained_params` in YAML config - 2. Set -o finetune\_exclude\_pretrained_params in the arguments. - - ```bash - export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 - python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \ - -o pretrain_weights=output/faster_rcnn_r50_1x/model_final/ \ - finetune_exclude_pretrained_params = ['cls_score','bbox_pred'] - ``` - -##### NOTES - -- `CUDA_VISIBLE_DEVICES` can specify different gpu numbers. Such as: `export CUDA_VISIBLE_DEVICES=0,1,2,3`. GPU calculation rules can refer [FAQ](#faq) -- Dataset will be downloaded automatically and cached in `~/.cache/paddle/dataset` if not be found locally. -- Pretrained model is downloaded automatically and cached in `~/.cache/paddle/weights`. -- Checkpoints are saved in `output` by default, and can be revised from save_dir in configure files. -- RCNN models training on CPU is not supported on PaddlePaddle<=1.5.1 and will be fixed on later version. - - -### Mixed Precision Training - -Mixed precision training can be enabled with `--fp16` flag. Currently Faster-FPN, Mask-FPN and Yolov3 have been verified to be working with little to no loss of precision (less than 0.2 mAP) - -To speed up mixed precision training, it is recommended to train in multi-process mode, for example - -```bash -python -m paddle.distributed.launch --selected_gpus 0,1,2,3,4,5,6,7 tools/train.py --fp16 -c configs/faster_rcnn_r50_fpn_1x.yml -``` - -If loss becomes `NaN` during training, try tweak the `--loss_scale` value. Please refer to the Nvidia [documentation](https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html#mptrain) on mixed precision training for details. - -Also, please note mixed precision training currently requires changing `norm_type` from `affine_channel` to `bn`. - - - -### Evaluation - -- Evaluate by specified weights path and dataset path - - ```bash - export CUDA_VISIBLE_DEVICES=0 - python -u tools/eval.py -c configs/faster_rcnn_r50_1x.yml \ - -o weights=https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar \ - -d dataset/coco - ``` - - The path of model to be evaluted can be both local path and link in [MODEL_ZOO](MODEL_ZOO_cn.md). - -- Evaluate with json - - ```bash - export CUDA_VISIBLE_DEVICES=0 - python tools/eval.py -c configs/faster_rcnn_r50_1x.yml \ - --json_eval \ - -f evaluation/ - ``` - - The json file must be named bbox.json or mask.json, placed in the `evaluation/` directory. - -#### NOTES - -- Multi-GPU evaluation for R-CNN and SSD models is not supported at the -moment, but it is a planned feature - - -### Inference - -- Output specified directory && Set up threshold - - ```bash - export CUDA_VISIBLE_DEVICES=0 - python tools/infer.py -c configs/faster_rcnn_r50_1x.yml \ - --infer_img=demo/000000570688.jpg \ - --output_dir=infer_output/ \ - --draw_threshold=0.5 \ - -o weights=output/faster_rcnn_r50_1x/model_final \ - --use_tb=Ture - ``` - - `--draw_threshold` is an optional argument. Default is 0.5. - Different thresholds will produce different results depending on the calculation of [NMS](https://ieeexplore.ieee.org/document/1699659). - - -- Export model - - ```bash - python tools/export_model.py -c configs/faster_rcnn_r50_1x.yml \ - --output_dir=inference_model \ - -o weights=output/faster_rcnn_r50_1x/model_final \ - FasterRCNNTestFeed.image_shape=[3,800,1333] - ``` - - Save inference model `tools/export_model.py`, which can be loaded by PaddlePaddle predict library. - -## FAQ - -**Q:** Why do I get `NaN` loss values during single GPU training?
-**A:** The default learning rate is tuned to multi-GPU training (8x GPUs), it must -be adapted for single GPU training accordingly (e.g., divide by 8). -The calculation rules are as follows,they are equivalent:
- - -| GPU number | Learning rate | Max_iters | Milestones | -| :---------: | :------------: | :-------: | :--------------: | -| 2 | 0.0025 | 720000 | [480000, 640000] | -| 4 | 0.005 | 360000 | [240000, 320000] | -| 8 | 0.01 | 180000 | [120000, 160000] | - - -**Q:** How to reduce GPU memory usage?
-**A:** Setting environment variable FLAGS_conv_workspace_size_limit to a smaller -number can reduce GPU memory footprint without affecting training speed. -Take Mask-RCNN (R50) as example, by setting `export FLAGS_conv_workspace_size_limit=512`, -batch size could reach 4 per GPU (Tesla V100 16GB). - - -**Q:** How to change data preprocessing?
-**A:** Set `sample_transform` in configuration. Note that **the whole transforms** need to be added in configuration. -For example, `DecodeImage`, `NormalizeImage` and `Permute` in RCNN models. For detail description, please refer -to [config_example](config_example). diff --git a/PaddleCV/PaddleDetection/docs/GETTING_STARTED_cn.md b/PaddleCV/PaddleDetection/docs/GETTING_STARTED_cn.md deleted file mode 100644 index 15cb8cdb239e72f204d598adbf78627000cb5bec..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/GETTING_STARTED_cn.md +++ /dev/null @@ -1,173 +0,0 @@ -# 开始 - -关于配置运行环境,请参考[安装指南](INSTALL_cn.md) - - -## 训练/评估/推断 - -PaddleDetection提供了训练/评估/推断三个功能的使用脚本,支持通过不同可选参数实现特定功能 - -```bash -# 设置PYTHONPATH路径 -export PYTHONPATH=$PYTHONPATH:. -# GPU训练 支持单卡,多卡训练,通过CUDA_VISIBLE_DEVICES指定卡号 -export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python tools/train.py -c configs/faster_rcnn_r50_1x.yml -# GPU评估 -export CUDA_VISIBLE_DEVICES=0 -python tools/eval.py -c configs/faster_rcnn_r50_1x.yml -# 推断 -python tools/infer.py -c configs/faster_rcnn_r50_1x.yml --infer_img=demo/000000570688.jpg -``` - -### 可选参数列表 - -以下列表可以通过`--help`查看 - -| FLAG | 支持脚本 | 用途 | 默认值 | 备注 | -| :----------------------: | :------------: | :---------------: | :--------------: | :-----------------: | -| -c | ALL | 指定配置文件 | None | **完整配置说明请参考[配置案例](config_example)** | -| -o | ALL | 设置配置文件里的参数内容 | None | 使用-o配置相较于-c选择的配置文件具有更高的优先级。例如:`-o use_gpu=False max_iter=10000` | -| -r/--resume_checkpoint | train | 从某一检查点恢复训练 | None | `-r output/faster_rcnn_r50_1x/10000` | -| --eval | train | 是否边训练边测试 | False | | -| --output_eval | train/eval | 编辑评测保存json路径 | 当前路径 | `--output_eval ./json_result` | -| -d/--dataset_dir | train/eval | 数据集路径, 同配置文件里的dataset_dir | None | `-d dataset/coco` | -| --fp16 | train | 是否使用混合精度训练模式 | False | 需使用GPU训练 | -| --loss_scale | train | 设置混合精度训练模式中损失值的缩放比例 | 8.0 | 需先开启`--fp16`后使用 | -| --json_eval | eval | 是否通过已存在的bbox.json或者mask.json进行评估 | False | json文件路径在`--output_eval`中设置 | -| --output_dir | infer | 输出推断后可视化文件 | `./output` | `--output_dir output` | -| --draw_threshold | infer | 可视化时分数阈值 | 0.5 | `--draw_threshold 0.7` | -| --infer_dir | infer | 用于推断的图片文件夹路径 | None | | -| --infer_img | infer | 用于推断的图片路径 | None | 相较于`--infer_dir`具有更高优先级 | -| --use_tb | train/infer | 是否使用[tb-paddle](https://github.com/linshuliang/tb-paddle)记录数据,进而在TensorBoard中显示 | False | | -| --tb\_log_dir | train/infer | 指定 tb-paddle 记录数据的存储路径 | train:`tb_log_dir/scalar` infer: `tb_log_dir/image` | | - - -## 使用示例 - -### 模型训练 - -- 边训练边测试 - - ```bash - export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 - python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml --eval -d dataset/coco - ``` - - 在训练中交替执行评估, 评估在每个snapshot\_iter时开始。每次评估后还会评出最佳mAP模型保存到`best_model`文件夹下。 - - 如果验证集很大,测试将会比较耗时,建议减少评估次数,或训练完再进行评估。 - - -- Fine-tune其他任务 - - 使用预训练模型fine-tune其他任务时,可采用如下两种方式: - - 1. 在YAML配置文件中设置`finetune_exclude_pretrained_params` - 2. 在命令行中添加-o finetune\_exclude\_pretrained_params对预训练模型进行选择性加载。 - - ```bash - export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 - python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \ - -o pretrain_weights=output/faster_rcnn_r50_1x/model_final/ \ - finetune_exclude_pretrained_params=['cls_score','bbox_pred'] - ``` - - 详细说明请参考[Transfer Learning](TRANSFER_LEARNING_cn.md) - -#### 提示 - -- `CUDA_VISIBLE_DEVICES` 参数可以指定不同的GPU。例如: `export CUDA_VISIBLE_DEVICES=0,1,2,3`. GPU计算规则可以参考 [FAQ](#faq) -- 若本地未找到数据集,将自动下载数据集并保存在`~/.cache/paddle/dataset`中。 -- 预训练模型自动下载并保存在`〜/.cache/paddle/weights`中。 -- 模型checkpoints默认保存在`output`中,可通过修改配置文件中save_dir进行配置。 -- RCNN系列模型CPU训练在PaddlePaddle 1.5.1及以下版本暂不支持。 - -### 混合精度训练 - -通过设置 `--fp16` 命令行选项可以启用混合精度训练。目前混合精度训练已经在Faster-FPN, Mask-FPN 及 Yolov3 上进行验证,几乎没有精度损失(小于0.2 mAP)。 - -建议使用多进程方式来进一步加速混合精度训练。示例如下。 - -```bash -python -m paddle.distributed.launch --selected_gpus 0,1,2,3,4,5,6,7 tools/train.py --fp16 -c configs/faster_rcnn_r50_fpn_1x.yml -``` - -如果训练过程中loss出现`NaN`,请尝试调节`--loss_scale`选项数值,细节请参看混合精度训练相关的[Nvidia文档](https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html#mptrain)。 - -另外,请注意将配置文件中的 `norm_type` 由 `affine_channel` 改为 `bn`。 - - -### 模型评估 - -- 指定权重和数据集路径 - - ```bash - export CUDA_VISIBLE_DEVICES=0 - python -u tools/eval.py -c configs/faster_rcnn_r50_1x.yml \ - -o weights=https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar \ - -d dataset/coco - ``` - - 评估模型可以为本地路径,例如`output/faster_rcnn_r50_1x/model_final/`, 也可以为[MODEL_ZOO](MODEL_ZOO_cn.md)中给出的模型链接。 - -- 通过json文件评估 - - ```bash - export CUDA_VISIBLE_DEVICES=0 - python -u tools/eval.py -c configs/faster_rcnn_r50_1x.yml \ - --json_eval \ - --output_eval evaluation/ - ``` - - json文件必须命名为bbox.json或者mask.json,放在`evaluation/`目录下。 - -#### 提示 - -- R-CNN和SSD模型目前暂不支持多GPU评估,将在后续版本支持 - - -### 模型推断 - -- 设置输出路径 && 设置推断阈值 - - ```bash - export CUDA_VISIBLE_DEVICES=0 - python -u tools/infer.py -c configs/faster_rcnn_r50_1x.yml \ - --infer_img=demo/000000570688.jpg \ - --output_dir=infer_output/ \ - --draw_threshold=0.5 \ - -o weights=output/faster_rcnn_r50_1x/model_final \ - ``` - - - `--draw_threshold` 是个可选参数. 根据 [NMS](https://ieeexplore.ieee.org/document/1699659) 的计算, - 不同阈值会产生不同的结果。如果用户需要对自定义路径的模型进行推断,可以设置`-o weights`指定模型路径。 - -## FAQ - -**Q:** 为什么我使用单GPU训练loss会出`NaN`?
-**A:** 默认学习率是适配多GPU训练(8x GPU),若使用单GPU训练,须对应调整学习率(例如,除以8)。 -计算规则表如下所示,它们是等价的:
- - -| GPU数 | 学习率 | 最大轮数 | 变化节点 | -| :---------: | :------------: | :-------: | :--------------: | -| 2 | 0.0025 | 720000 | [480000, 640000] | -| 4 | 0.005 | 360000 | [240000, 320000] | -| 8 | 0.01 | 180000 | [120000, 160000] | - - -**Q:** 如何减少GPU显存使用率?
-**A:** 可通过设置环境变量`FLAGS_conv_workspace_size_limit`为较小的值来减少显存消耗,并且不 -会影响训练速度。以Mask-RCNN(R50)为例,设置`export FLAGS_conv_workspace_size_limit = 512`, -batch size可以达到每GPU 4 (Tesla V100 16GB)。 - - -**Q:** 如何修改数据预处理?
-**A:** 可在配置文件中设置 `sample_transform`。注意需要在配置文件中加入**完整预处理** -例如RCNN模型中`DecodeImage`, `NormalizeImage` and `Permute`。更多详细描述请参考[配置案例](config_example)。 - - -**Q:** affine_channel和batch norm是什么关系? -**A:** 在RCNN系列模型加载预训练模型初始化,有时候会固定住batch norm的参数, 使用预训练模型中的全局均值和方式,并且batch norm的scale和bias参数不更新,已发布的大多ResNet系列的RCNN模型采用这种方式。这种情况下可以在config中设置norm_type为bn或affine_channel, freeze_norm为true (默认为true),两种方式等价。affne_channel的计算方式为`scale * x + bias`。只不过设置affine_channel时,内部对batch norm的参数自动做了融合。如果训练使用的affine_channel,用保存的模型做初始化,训练其他任务时,即可使用affine_channel, 也可使用batch norm, 参数均可正确加载。 diff --git a/PaddleCV/PaddleDetection/docs/INSTALL.md b/PaddleCV/PaddleDetection/docs/INSTALL.md deleted file mode 100644 index 6a9baf0a4aa7078d27ee654764e9a99cc49071be..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/INSTALL.md +++ /dev/null @@ -1,205 +0,0 @@ -English | [简体中文](INSTALL_cn.md) - -# Installation - ---- -## Table of Contents - -- [Introduction](#introduction) -- [PaddlePaddle](#paddlepaddle) -- [Other Dependencies](#other-dependencies) -- [PaddleDetection](#paddle-detection) -- [Datasets](#datasets) - - -## Introduction - -This document covers how to install PaddleDetection, its dependencies -(including PaddlePaddle), together with COCO and Pascal VOC dataset. - -For general information about PaddleDetection, please see [README.md](../README.md). - - -## PaddlePaddle - -Running PaddleDetection requires PaddlePaddle Fluid v.1.5 and later. please follow the instructions in [installation document](http://www.paddlepaddle.org.cn/). - -Please make sure your PaddlePaddle installation was successful and the version -of your PaddlePaddle is not lower than required. Verify with the following commands. - -``` -# To check PaddlePaddle installation in your Python interpreter ->>> import paddle.fluid as fluid ->>> fluid.install_check.run_check() - -# To check PaddlePaddle version -python -c "import paddle; print(paddle.__version__)" -``` - -### Requirements: - -- Python2 or Python3 (Only support Python3 for windows) -- CUDA >= 8.0 -- cuDNN >= 5.0 -- nccl >= 2.1.2 - - -## Other Dependencies - -[COCO-API](https://github.com/cocodataset/cocoapi): - -COCO-API is needed for running. Installation is as follows: - - git clone https://github.com/cocodataset/cocoapi.git - cd cocoapi/PythonAPI - # if cython is not installed - pip install Cython - # Install into global site-packages - make install - # Alternatively, if you do not have permissions or prefer - # not to install the COCO API into global site-packages - python setup.py install --user - -**Installation of COCO-API in windows:** - - # if cython is not installed - pip install Cython - # Because the origin version of cocoapi does not support windows, another version is used which only supports Python3 - pip install git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI - -## PaddleDetection - -**Clone Paddle models repository:** - -You can clone Paddle models and change working directory to PaddleDetection -with the following commands: - -``` -cd -git clone https://github.com/PaddlePaddle/models -cd models/PaddleCV/PaddleDetection -``` - -**Install Python dependencies:** - -Required python packages are specified in [requirements.txt](../requirements.txt), and can be installed with: - -``` -pip install -r requirements.txt -``` - -**Make sure the tests pass:** - -``` -export PYTHONPATH=`pwd`:$PYTHONPATH -python ppdet/modeling/tests/test_architectures.py -``` - - -## Datasets - -PaddleDetection includes support for [COCO](http://cocodataset.org) and [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) by default, please follow these instructions to set up the dataset. - -**Create symlinks for local datasets:** - -Default dataset path in config files is `dataset/coco` and `dataset/voc`, if the -datasets are already available on disk, you can simply create symlinks to -their directories: - -``` -ln -sf /dataset/coco -ln -sf /dataset/voc -``` - -For Pascal VOC dataset, you should create file list by: - -``` -export PYTHONPATH=$PYTHONPATH:. -python dataset/voc/create_list.py -``` - -**Download datasets manually:** - -On the other hand, to download the datasets, run the following commands: - -- COCO - -``` -export PYTHONPATH=$PYTHONPATH:. -python dataset/coco/download_coco.py -``` - -`COCO` dataset with directory structures like this: - - ``` - dataset/coco/ - ├── annotations - │ ├── instances_train2014.json - │ ├── instances_train2017.json - │ ├── instances_val2014.json - │ ├── instances_val2017.json - │ | ... - ├── train2017 - │ ├── 000000000009.jpg - │ ├── 000000580008.jpg - │ | ... - ├── val2017 - │ ├── 000000000139.jpg - │ ├── 000000000285.jpg - │ | ... - | ... - ``` - -- Pascal VOC - -``` -export PYTHONPATH=$PYTHONPATH:. -python dataset/voc/download_voc.py -python dataset/voc/create_list.py -``` - -`Pascal VOC` dataset with directory structure like this: - - ``` - dataset/voc/ - ├── train.txt - ├── val.txt - ├── test.txt - ├── label_list.txt (optional) - ├── VOCdevkit/VOC2007 - │ ├── Annotations - │ ├── 001789.xml - │ | ... - │ ├── JPEGImages - │ ├── 001789.xml - │ | ... - │ ├── ImageSets - │ | ... - ├── VOCdevkit/VOC2012 - │ ├── Annotations - │ ├── 003876.xml - │ | ... - │ ├── JPEGImages - │ ├── 003876.xml - │ | ... - │ ├── ImageSets - │ | ... - | ... - ``` - -**NOTE:** If you set `use_default_label=False` in yaml configs, the `label_list.txt` -of Pascal VOC dataset will be read, otherwise, `label_list.txt` is unnecessary and -the default Pascal VOC label list which defined in -[voc\_loader.py](../ppdet/data/source/voc_loader.py) will be used. - -**Download datasets automatically:** - -If a training session is started but the dataset is not setup properly (e.g, -not found in `dataset/coco` or `dataset/voc`), PaddleDetection can automatically -download them from [COCO-2017](http://images.cocodataset.org) and -[VOC2012](http://host.robots.ox.ac.uk/pascal/VOC), the decompressed datasets -will be cached in `~/.cache/paddle/dataset/` and can be discovered automatically -subsequently. - - -**NOTE:** For further informations on the datasets, please see [DATA.md](DATA.md) diff --git a/PaddleCV/PaddleDetection/docs/INSTALL_cn.md b/PaddleCV/PaddleDetection/docs/INSTALL_cn.md deleted file mode 100644 index f8eee189be251ecbf793689d01007f0275cae99c..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/INSTALL_cn.md +++ /dev/null @@ -1,199 +0,0 @@ -# 安装文档 - ---- -## 目录 - -- [简介](#introduction) -- [PaddlePaddle](#paddlepaddle) -- [其他依赖安装](#other-dependencies) -- [PaddleDetection](#paddle-detection) -- [数据集](#datasets) - - -## 简介 - -这份文档介绍了如何安装PaddleDetection及其依赖项(包括PaddlePaddle),以及COCO和Pascal VOC数据集。 - -PaddleDetection的相关信息,请参考[README.md](../README.md). - - -## PaddlePaddle - - -运行PaddleDetection需要PaddlePaddle Fluid v.1.5及更高版本。请按照[安装文档](http://www.paddlepaddle.org.cn/)中的说明进行操作。 - -请确保您的PaddlePaddle安装成功并且版本不低于需求版本。使用以下命令进行验证。 - -``` -# 在您的Python解释器中确认PaddlePaddle安装成功 ->>> import paddle.fluid as fluid ->>> fluid.install_check.run_check() - -# 确认PaddlePaddle版本 -python -c "import paddle; print(paddle.__version__)" -``` - -### 环境需求: - -- Python2 or Python3 (windows系统仅支持Python3) -- CUDA >= 8.0 -- cuDNN >= 5.0 -- nccl >= 2.1.2 - - -## 其他依赖安装 - -[COCO-API](https://github.com/cocodataset/cocoapi): - -运行需要COCO-API,安装方式如下: - - git clone https://github.com/cocodataset/cocoapi.git - cd cocoapi/PythonAPI - # 若Cython未安装,请安装Cython - pip install Cython - # 安装至全局site-packages - make install - # 若您没有权限或更倾向不安装至全局site-packages - python setup.py install --user - -**windows用户安装COCO-API方式:** - - # 若Cython未安装,请安装Cython - pip install Cython - # 由于原版cocoapi不支持windows,采用第三方实现版本,该版本仅支持Python3 - pip install git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI - -## PaddleDetection - -**克隆Paddle models模型库:** - -您可以通过以下命令克隆Paddle models模型库并切换工作目录至PaddleDetection: - -``` -cd -git clone https://github.com/PaddlePaddle/models -cd models/PaddleCV/PaddleDetection -``` - -**安装Python依赖库:** - -Python依赖库在[requirements.txt](../requirements.txt)中给出,可通过如下命令安装: - -``` -pip install -r requirements.txt -``` - -**确认测试通过:** - -``` -export PYTHONPATH=`pwd`:$PYTHONPATH -python ppdet/modeling/tests/test_architectures.py -``` - - -## 数据集 - - -PaddleDetection默认支持[COCO](http://cocodataset.org)和[Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/), -请按照如下步骤设置数据集。 - -**为本地数据集创建软链接:** - - -配置文件中默认的数据集路径是`dataset/coco`和`dataset/voc`,如果您本地磁盘上已有数据集, -只需创建软链接至数据集目录: - -``` -ln -sf /dataset/coco -ln -sf /dataset/voc -``` - -对于Pascal VOC数据集,需通过如下命令创建文件列表: - -``` -export PYTHONPATH=$PYTHONPATH:. -python dataset/voc/create_list.py -``` - -**手动下载数据集:** - -若您本地没有数据集,可通过如下命令下载: - -- COCO - -``` -export PYTHONPATH=$PYTHONPATH:. -python dataset/coco/download_coco.py -``` - -`COCO` 数据集目录结构如下: - - ``` - dataset/coco/ - ├── annotations - │ ├── instances_train2014.json - │ ├── instances_train2017.json - │ ├── instances_val2014.json - │ ├── instances_val2017.json - │ | ... - ├── train2017 - │ ├── 000000000009.jpg - │ ├── 000000580008.jpg - │ | ... - ├── val2017 - │ ├── 000000000139.jpg - │ ├── 000000000285.jpg - │ | ... - | ... - ``` - -- Pascal VOC - -``` -export PYTHONPATH=$PYTHONPATH:. -python dataset/voc/download_voc.py -python dataset/voc/create_list.py -``` - -`Pascal VOC` 数据集目录结构如下: - - ``` - dataset/voc/ - ├── train.txt - ├── val.txt - ├── test.txt - ├── label_list.txt (optional) - ├── VOCdevkit/VOC2007 - │ ├── Annotations - │ ├── 001789.xml - │ | ... - │ ├── JPEGImages - │ ├── 001789.xml - │ | ... - │ ├── ImageSets - │ | ... - ├── VOCdevkit/VOC2012 - │ ├── Annotations - │ ├── 003876.xml - │ | ... - │ ├── JPEGImages - │ ├── 003876.xml - │ | ... - │ ├── ImageSets - │ | ... - | ... - ``` - -**说明:** 如果你在yaml配置文件中设置`use_default_label=False`, 将从`label_list.txt` -中读取类别列表,反之则可以没有`label_list.txt`文件,检测库会使用Pascal VOC数据集的默 -认类别列表,默认类别列表定义在[voc\_loader.py](../ppdet/data/source/voc_loader.py) - -**自动下载数据集:** - -若您在数据集未成功设置(例如,在`dataset/coco`或`dataset/voc`中找不到)的情况下开始运行, -PaddleDetection将自动从[COCO-2017](http://images.cocodataset.org)或 -[VOC2012](http://host.robots.ox.ac.uk/pascal/VOC)下载,解压后的数据集将被保存在 -`〜/.cache/paddle/dataset/`目录下,下次运行时,也可自动从该目录发现数据集。 - - -**说明:** 更多有关数据集的介绍,请参考[DATA.md](DATA_cn.md) diff --git a/PaddleCV/PaddleDetection/docs/MODEL_ZOO.md b/PaddleCV/PaddleDetection/docs/MODEL_ZOO.md deleted file mode 100644 index d6042ada1293ea77a1670871bbff1d6f94f8a163..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/MODEL_ZOO.md +++ /dev/null @@ -1,165 +0,0 @@ -English | [简体中文](MODEL_ZOO_cn.md) - -# Model Zoo and Benchmark -## Environment - -- Python 2.7.1 -- PaddlePaddle >=1.5 -- CUDA 9.0 -- cuDNN >=7.4 -- NCCL 2.1.2 - -## Common settings - -- All models below were trained on `coco_2017_train`, and tested on `coco_2017_val`. -- Batch Normalization layers in backbones are replaced by Affine Channel layers. -- Unless otherwise noted, all ResNet backbones adopt the [ResNet-B](https://arxiv.org/pdf/1812.01187) variant.. -- For RCNN and RetinaNet models, only horizontal flipping data augmentation was used in the training phase and no augmentations were used in the testing phase. -- **Inf time (fps)**: the inference time is measured with fps (image/s) on a single GPU (Tesla V100) with cuDNN 7.5 by running 'tools/eval.py' on all validation set, which including data loadding, network forward and post processing. The batch size is 1. - - -## Training Schedules - -- We adopt exactly the same training schedules as [Detectron](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#training-schedules). -- 1x indicates the schedule starts at a LR of 0.02 and is decreased by a factor of 10 after 60k and 80k iterations and eventually terminates at 90k iterations for minibatch size 16. For batch size 8, LR is decreased to 0.01, total training iterations are doubled, and the decay milestones are scaled by 2. -- 2x schedule is twice as long as 1x, with the LR milestones scaled accordingly. - -## ImageNet Pretrained Models - -The backbone models pretrained on ImageNet are available. All backbone models are pretrained on standard ImageNet-1k dataset and can be downloaded [here](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification#supported-models-and-performances). - -- **Notes:** The ResNet50 model was trained with cosine LR decay schedule and can be downloaded [here](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar). - -## Baselines - -### Faster & Mask R-CNN - -| Backbone | Type | Image/gpu | Lr schd | Inf time (fps) | Box AP | Mask AP | Download | -| :---------------------- | :------------- | :-------: | :-----: | :------------: | :----: | :-----: | :----------------------------------------------------------: | -| ResNet50 | Faster | 1 | 1x | 12.747 | 35.2 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar) | -| ResNet50 | Faster | 1 | 2x | 12.686 | 37.1 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_2x.tar) | -| ResNet50 | Mask | 1 | 1x | 11.615 | 36.5 | 32.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_1x.tar) | -| ResNet50 | Mask | 1 | 2x | 11.494 | 38.2 | 33.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_2x.tar) | -| ResNet50-vd | Faster | 1 | 1x | 12.575 | 36.4 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_1x.tar) | -| ResNet50-FPN | Faster | 2 | 1x | 22.273 | 37.2 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_1x.tar) | -| ResNet50-FPN | Faster | 2 | 2x | 22.297 | 37.7 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_2x.tar) | -| ResNet50-FPN | Mask | 1 | 1x | 15.184 | 37.9 | 34.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_fpn_1x.tar) | -| ResNet50-FPN | Mask | 1 | 2x | 15.881 | 38.7 | 34.7 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_fpn_2x.tar) | -| ResNet50-FPN | Cascade Faster | 2 | 1x | 17.507 | 40.9 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_r50_fpn_1x.tar) | -| ResNet50-FPN | Cascade Mask | 1 | 1x | - | 41.3 | 35.5 | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_mask_rcnn_r50_fpn_1x.tar) | -| ResNet50-vd-FPN | Faster | 2 | 2x | 21.847 | 38.9 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_2x.tar) | -| ResNet50-vd-FPN | Mask | 1 | 2x | 15.825 | 39.8 | 35.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_vd_fpn_2x.tar) | -| ResNet101 | Faster | 1 | 1x | 9.316 | 38.3 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_1x.tar) | -| ResNet101-FPN | Faster | 1 | 1x | 17.297 | 38.7 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_1x.tar) | -| ResNet101-FPN | Faster | 1 | 2x | 17.246 | 39.1 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_2x.tar) | -| ResNet101-FPN | Mask | 1 | 1x | 12.983 | 39.5 | 35.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_fpn_1x.tar) | -| ResNet101-vd-FPN | Faster | 1 | 1x | 17.011 | 40.5 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_1x.tar) | -| ResNet101-vd-FPN | Faster | 1 | 2x | 16.934 | 40.8 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_2x.tar) | -| ResNet101-vd-FPN | Mask | 1 | 1x | 13.105 | 41.4 | 36.8 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_vd_fpn_1x.tar) | -| ResNeXt101-vd-64x4d-FPN | Faster | 1 | 1x | 8.815 | 42.2 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_x101_vd_64x4d_fpn_1x.tar) | -| ResNeXt101-vd-64x4d-FPN | Faster | 1 | 2x | 8.809 | 41.7 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_x101_vd_64x4d_fpn_2x.tar) | -| ResNeXt101-vd-64x4d-FPN | Mask | 1 | 1x | 7.689 | 42.9 | 37.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_x101_vd_64x4d_fpn_1x.tar) | -| ResNeXt101-vd-64x4d-FPN | Mask | 1 | 2x | 7.859 | 42.6 | 37.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_x101_vd_64x4d_fpn_2x.tar) | -| SENet154-vd-FPN | Faster | 1 | 1.44x | 3.408 | 42.9 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_se154_vd_fpn_s1x.tar) | -| SENet154-vd-FPN | Mask | 1 | 1.44x | 3.233 | 44.0 | 38.7 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_se154_vd_fpn_s1x.tar) | - -### Deformable ConvNets v2 - -| Backbone | Type | Conv | Image/gpu | Lr schd | Inf time (fps) | Box AP | Mask AP | Download | -| :---------------------- | :------------- | :---: | :-------: | :-----: | :------------: | :----: | :-----: | :----------------------------------------------------------: | -| ResNet50-FPN | Faster | c3-c5 | 2 | 1x | 19.978 | 41.0 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_r50_fpn_1x.tar) | -| ResNet50-vd-FPN | Faster | c3-c5 | 2 | 2x | 19.222 | 42.4 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_r50_vd_fpn_2x.tar) | -| ResNet101-vd-FPN | Faster | c3-c5 | 2 | 1x | 14.477 | 44.1 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_r101_vd_fpn_1x.tar) | -| ResNeXt101-vd-64x4d-FPN | Faster | c3-c5 | 1 | 1x | 7.209 | 45.2 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) | -| ResNet50-FPN | Mask | c3-c5 | 1 | 1x | 14.53 | 41.9 | 37.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r50_fpn_1x.tar) | -| ResNet50-vd-FPN | Mask | c3-c5 | 1 | 2x | 14.832 | 42.9 | 38.0 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r50_vd_fpn_2x.tar) | -| ResNet101-vd-FPN | Mask | c3-c5 | 1 | 1x | 11.546 | 44.6 | 39.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r101_vd_fpn_1x.tar) | -| ResNeXt101-vd-64x4d-FPN | Mask | c3-c5 | 1 | 1x | 6.45 | 46.2 | 40.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) | -| ResNet50-FPN | Cascade Faster | c3-c5 | 2 | 1x | - | 44.2 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_r50_fpn_1x.tar) | -| ResNet101-vd-FPN | Cascade Faster | c3-c5 | 2 | 1x | - | 46.4 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_r101_vd_fpn_1x.tar) | -| ResNeXt101-vd-FPN | Cascade Faster | c3-c5 | 2 | 1x | - | 47.3 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) | -| SENet154-vd-FPN | Cascade Mask | c3-c5 | 1 | 1.44x | - | 51.9 | 43.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.tar) | - -#### Notes: -- Deformable ConvNets v2(dcn_v2) reference from [Deformable ConvNets v2](https://arxiv.org/abs/1811.11168). -- `c3-c5` means adding `dcn` in resnet stage 3 to 5. -- Detailed configuration file in [configs/dcn](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/PaddleDetection/configs/dcn) - -### Group Normalization -| Backbone | Type | Image/gpu | Lr schd | Box AP | Mask AP | Download | -| :------------------- | :------------- | :-----: | :-----: | :----: | :-----: | :----------------------------------------------------------: | -| ResNet50-FPN | Faster | 2 | 2x | 39.7 | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_gn_2x.tar) | -| ResNet50-FPN | Mask | 1 | 2x | 40.1 | 35.8 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_fpn_gn_2x.tar) | - -#### Notes: -- Group Normalization reference from [Group Normalization](https://arxiv.org/abs/1803.08494). -- Detailed configuration file in [configs/gn](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/PaddleDetection/configs/gn) - -### Yolo v3 - -| Backbone | Size | Image/gpu | Lr schd | Inf time (fps) | Box AP | Download | -| :----------- | :--: | :-------: | :-----: | :------------: | :----: | :----------------------------------------------------------: | -| DarkNet53 | 608 | 8 | 270e | 45.571 | 38.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar) | -| DarkNet53 | 416 | 8 | 270e | - | 37.5 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar) | -| DarkNet53 | 320 | 8 | 270e | - | 34.8 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar) | -| MobileNet-V1 | 608 | 8 | 270e | 78.302 | 29.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) | -| MobileNet-V1 | 416 | 8 | 270e | - | 29.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) | -| MobileNet-V1 | 320 | 8 | 270e | - | 27.1 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) | -| ResNet34 | 608 | 8 | 270e | 63.356 | 36.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar) | -| ResNet34 | 416 | 8 | 270e | - | 34.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar) | -| ResNet34 | 320 | 8 | 270e | - | 31.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar) | - - -### Yolo v3 on Pascal VOC - -| Backbone | Size | Image/gpu | Lr schd | Inf time (fps) | Box AP | Download | -| :----------- | :--: | :-------: | :-----: | :------------: | :----: | :----------------------------------------------------------: | -| DarkNet53 | 608 | 8 | 270e | 54.977 | 83.5 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet_voc.tar) | -| DarkNet53 | 416 | 8 | 270e | - | 83.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet_voc.tar) | -| DarkNet53 | 320 | 8 | 270e | - | 82.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet_voc.tar) | -| MobileNet-V1 | 608 | 8 | 270e | 104.291 | 76.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_voc.tar) | -| MobileNet-V1 | 416 | 8 | 270e | - | 76.7 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_voc.tar) | -| MobileNet-V1 | 320 | 8 | 270e | - | 75.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_voc.tar) | -| ResNet34 | 608 | 8 | 270e | 82.247 | 82.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34_voc.tar) | -| ResNet34 | 416 | 8 | 270e | - | 81.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34_voc.tar) | -| ResNet34 | 320 | 8 | 270e | - | 80.1 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34_voc.tar) | - -**Notes:** Yolo v3 is trained in 8 GPU with total batch size as 64 and trained 270 epoches. Yolo v3 training data augmentations: mixup, -randomly color distortion, randomly cropping, randomly expansion, randomly interpolation method, randomly flippling. Yolo v3 used randomly -reshaped minibatch in training, inferences can be performed on different image sizes with the same model weights, and we provided evaluation -results of image size 608/416/320 above. - -### RetinaNet - -| Backbone | Image/gpu | Lr schd | Box AP | Download | -| :---------------: | :-----: | :-----: | :----: | :-------: | -| ResNet50-FPN | 2 | 1x | 36.0 | [model](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_r50_fpn_1x.tar) | -| ResNet101-FPN | 2 | 1x | 37.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_r101_fpn_1x.tar) | -| ResNeXt101-vd-FPN | 1 | 1x | 40.5 | [model](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_x101_vd_64x4d_fpn_1x.tar) | - -**Notes:** In RetinaNet, the base LR is changed to 0.01 for minibatch size 16. - -### SSD - -| Backbone | Size | Image/gpu | Lr schd | Inf time (fps) | Box AP | Download | -| :------: | :--: | :-------: | :-----: | :------------: | :----: | :----------------------------------------------------------: | -| VGG16 | 300 | 8 | 40w | 81.613 | 25.1 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_vgg16_300.tar) | -| VGG16 | 512 | 8 | 40w | 46.007 | 29.1 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_vgg16_512.tar) | - -**Notes:** VGG-SSD is trained in 4 GPU with total batch size as 32 and trained 400000 iters. - -### SSD on Pascal VOC - -| Backbone | Size | Image/gpu | Lr schd | Inf time (fps) | Box AP | Download | -| :----------- | :--: | :-------: | :-----: | :------------: | :----: | :----------------------------------------------------------: | -| MobileNet v1 | 300 | 32 | 120e | 159.543 | 73.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar) | -| VGG16 | 300 | 8 | 240e | 117.279 | 77.5 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_vgg16_300_voc.tar) | -| VGG16 | 512 | 8 | 240e | 65.975 | 80.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_vgg16_512_voc.tar) | - -**NOTE**: MobileNet-SSD is trained in 2 GPU with totoal batch size as 64 and trained 120 epoches. VGG-SSD is trained in 4 GPU with total batch size as 32 and trained 240 epoches. SSD training data augmentations: randomly color distortion, -randomly cropping, randomly expansion, randomly flipping. - - -## Face Detection - -Please refer [face detection models](../configs/face_detection) for details. diff --git a/PaddleCV/PaddleDetection/docs/MODEL_ZOO_cn.md b/PaddleCV/PaddleDetection/docs/MODEL_ZOO_cn.md deleted file mode 100644 index 918a9cb249d105d9ed7cd0411ede8c5e8ce0c3d5..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/MODEL_ZOO_cn.md +++ /dev/null @@ -1,156 +0,0 @@ -# 模型库和基线 - -## 测试环境 - -- Python 2.7.1 -- PaddlePaddle >=1.5 -- CUDA 9.0 -- cuDNN >=7.4 -- NCCL 2.1.2 - -## 通用设置 - -- 所有模型均在COCO17数据集中训练和测试。 -- 除非特殊说明,所有ResNet骨干网络采用[ResNet-B](https://arxiv.org/pdf/1812.01187)结构。 -- 对于RCNN和RetinaNet系列模型,训练阶段仅使用水平翻转作为数据增强,测试阶段不使用数据增强。 -- **推理时间(fps)**: 推理时间是在一张Tesla V100的GPU上通过'tools/eval.py'测试所有验证集得到,单位是fps(图片数/秒), cuDNN版本是7.5,包括数据加载、网络前向执行和后处理, batch size是1。 - -## 训练策略 - -- 我们采用和[Detectron](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#training-schedules)相同的训练策略。 -- 1x 策略表示:在总batch size为16时,初始学习率为0.02,在6万轮和8万轮后学习率分别下降10倍,最终训练9万轮。在总batch size为8时,初始学习率为0.01,在12万轮和16万轮后学习率分别下降10倍,最终训练18万轮。 -- 2x 策略为1x策略的两倍,同时学习率调整位置也为1x的两倍。 - -## ImageNet预训练模型 - -Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型均通过标准的Imagenet-1k数据集训练得到。[下载链接](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification#supported-models-and-performances) - -- 注:ResNet50模型通过余弦学习率调整策略训练得到。[下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar) - -## 基线 - -### Faster & Mask R-CNN - -| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | Mask AP | 下载 | -| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----: | :-----------------------------------------------------: | -| ResNet50 | Faster | 1 | 1x | 12.747 | 35.2 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar) | -| ResNet50 | Faster | 1 | 2x | 12.686 | 37.1 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_2x.tar) | -| ResNet50 | Mask | 1 | 1x | 11.615 | 36.5 | 32.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_1x.tar) | -| ResNet50 | Mask | 1 | 2x | 11.494 | 38.2 | 33.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_2x.tar) | -| ResNet50-vd | Faster | 1 | 1x | 12.575 | 36.4 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_1x.tar) | -| ResNet50-FPN | Faster | 2 | 1x | 22.273 | 37.2 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_1x.tar) | -| ResNet50-FPN | Faster | 2 | 2x | 22.297 | 37.7 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_2x.tar) | -| ResNet50-FPN | Mask | 1 | 1x | 15.184 | 37.9 | 34.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_fpn_1x.tar) | -| ResNet50-FPN | Mask | 1 | 2x | 15.881 | 38.7 | 34.7 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_fpn_2x.tar) | -| ResNet50-FPN | Cascade Faster | 2 | 1x | 17.507 | 40.9 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_r50_fpn_1x.tar) | -| ResNet50-FPN | Cascade Mask | 1 | 1x | - | 41.3 | 35.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_mask_rcnn_r50_fpn_1x.tar) | -| ResNet50-vd-FPN | Faster | 2 | 2x | 21.847 | 38.9 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_2x.tar) | -| ResNet50-vd-FPN | Mask | 1 | 2x | 15.825 | 39.8 | 35.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_vd_fpn_2x.tar) | -| ResNet101 | Faster | 1 | 1x | 9.316 | 38.3 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_1x.tar) | -| ResNet101-FPN | Faster | 1 | 1x | 17.297 | 38.7 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_1x.tar) | -| ResNet101-FPN | Faster | 1 | 2x | 17.246 | 39.1 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_2x.tar) | -| ResNet101-FPN | Mask | 1 | 1x | 12.983 | 39.5 | 35.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_fpn_1x.tar) | -| ResNet101-vd-FPN | Faster | 1 | 1x | 17.011 | 40.5 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_1x.tar) | -| ResNet101-vd-FPN | Faster | 1 | 2x | 16.934 | 40.8 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_2x.tar) | -| ResNet101-vd-FPN | Mask | 1 | 1x | 13.105 | 41.4 | 36.8 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_vd_fpn_1x.tar) | -| ResNeXt101-vd-FPN | Faster | 1 | 1x | 8.815 | 42.2 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_x101_vd_64x4d_fpn_1x.tar) | -| ResNeXt101-vd-FPN | Faster | 1 | 2x | 8.809 | 41.7 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_x101_vd_64x4d_fpn_2x.tar) | -| ResNeXt101-vd-FPN | Mask | 1 | 1x | 7.689 | 42.9 | 37.9 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_x101_vd_64x4d_fpn_1x.tar) | -| ResNeXt101-vd-FPN | Mask | 1 | 2x | 7.859 | 42.6 | 37.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_x101_vd_64x4d_fpn_2x.tar) | -| SENet154-vd-FPN | Faster | 1 | 1.44x | 3.408 | 42.9 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_se154_vd_fpn_s1x.tar) | -| SENet154-vd-FPN | Mask | 1 | 1.44x | 3.233 | 44.0 | 38.7 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_se154_vd_fpn_s1x.tar) | - -### Deformable 卷积网络v2 - -| 骨架网络 | 网络类型 | 卷积 | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | Mask AP | 下载 | -| :------------------- | :------------- | :-----: |:--------: | :-----: | :-----------: |:----: | :-----: | :----------------------------------------------------------: | -| ResNet50-FPN | Faster | c3-c5 | 2 | 1x | 19.978 | 41.0 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_r50_fpn_1x.tar) | -| ResNet50-vd-FPN | Faster | c3-c5 | 2 | 2x | 19.222 | 42.4 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_r50_vd_fpn_2x.tar) | -| ResNet101-vd-FPN | Faster | c3-c5 | 2 | 1x | 14.477 | 44.1 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_r101_vd_fpn_1x.tar) | -| ResNeXt101-vd-FPN | Faster | c3-c5 | 1 | 1x | 7.209 | 45.2 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) | -| ResNet50-FPN | Mask | c3-c5 | 1 | 1x | 14.53 | 41.9 | 37.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r50_fpn_1x.tar) | -| ResNet50-vd-FPN | Mask | c3-c5 | 1 | 2x | 14.832 | 42.9 | 38.0 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r50_vd_fpn_2x.tar) | -| ResNet101-vd-FPN | Mask | c3-c5 | 1 | 1x | 11.546 | 44.6 | 39.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_r101_vd_fpn_1x.tar) | -| ResNeXt101-vd-FPN | Mask | c3-c5 | 1 | 1x | 6.45 | 46.2 | 40.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) | -| ResNet50-FPN | Cascade Faster | c3-c5 | 2 | 1x | - | 44.2 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_r50_fpn_1x.tar) | -| ResNet101-vd-FPN | Cascade Faster | c3-c5 | 2 | 1x | - | 46.4 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_r101_vd_fpn_1x.tar) | -| ResNeXt101-vd-FPN | Cascade Faster | c3-c5 | 2 | 1x | - | 47.3 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.tar) | -| SENet154-vd-FPN | Cascade Mask | c3-c5 | 1 | 1.44x | - | 51.9 | 43.9 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.tar) | - -#### 注意事项: -- Deformable卷积网络v2(dcn_v2)参考自论文[Deformable ConvNets v2](https://arxiv.org/abs/1811.11168). -- `c3-c5`意思是在resnet模块的3到5阶段增加`dcn`. -- 详细的配置文件在[configs/dcn](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/PaddleDetection/configs/dcn) - -### Group Normalization -| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 | Box AP | Mask AP | 下载 | -| :------------------- | :------------- |:--------: | :-----: | :----: | :-----: | :----------------------------------------------------------: | -| ResNet50-FPN | Faster | 2 | 2x | 39.7 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_gn_2x.tar) | -| ResNet50-FPN | Mask | 1 | 2x | 40.1 | 35.8 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_fpn_gn_2x.tar) | - -#### 注意事项: -- Group Normalization参考论文[Group Normalization](https://arxiv.org/abs/1803.08494). -- 详细的配置文件在[configs/gn](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/PaddleDetection/configs/gn) - -### Yolo v3 - -| 骨架网络 | 输入尺寸 | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | 下载 | -| :----------- | :--: | :-----: | :-----: |:------------: |:----: | :-------: | -| DarkNet53 | 608 | 8 | 270e | 45.571 | 38.9 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar) | -| DarkNet53 | 416 | 8 | 270e | - | 37.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar) | -| DarkNet53 | 320 | 8 | 270e | - | 34.8 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar) | -| MobileNet-V1 | 608 | 8 | 270e | 78.302 | 29.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) | -| MobileNet-V1 | 416 | 8 | 270e | - | 29.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) | -| MobileNet-V1 | 320 | 8 | 270e | - | 27.1 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) | -| ResNet34 | 608 | 8 | 270e | 63.356 | 36.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar) | -| ResNet34 | 416 | 8 | 270e | - | 34.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar) | -| ResNet34 | 320 | 8 | 270e | - | 31.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar) | - -### Yolo v3 基于Pasacl VOC数据集 - -| 骨架网络 | 输入尺寸 | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | 下载 | -| :----------- | :--: | :-----: | :-----: |:------------: |:----: | :-------: | -| DarkNet53 | 608 | 8 | 270e | 54.977 | 83.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet_voc.tar) | -| DarkNet53 | 416 | 8 | 270e | - | 83.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet_voc.tar) | -| DarkNet53 | 320 | 8 | 270e | - | 82.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet_voc.tar) | -| MobileNet-V1 | 608 | 8 | 270e | 104.291 | 76.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_voc.tar) | -| MobileNet-V1 | 416 | 8 | 270e | - | 76.7 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_voc.tar) | -| MobileNet-V1 | 320 | 8 | 270e | - | 75.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_voc.tar) | -| ResNet34 | 608 | 8 | 270e | 82.247 | 82.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34_voc.tar) | -| ResNet34 | 416 | 8 | 270e | - | 81.9 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34_voc.tar) | -| ResNet34 | 320 | 8 | 270e | - | 80.1 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34_voc.tar) | - -**注意事项:** Yolo v3在8卡,总batch size为64下训练270轮。数据增强包括:mixup, 随机颜色失真,随机剪裁,随机扩张,随机插值法,随机翻转。Yolo v3在训练阶段对minibatch采用随机reshape,可以采用相同的模型测试不同尺寸图片,我们分别提供了尺寸为608/416/320大小的测试结果。 - -### RetinaNet - -| 骨架网络 | 每张GPU图片个数 | 学习率策略 | Box AP | 下载 | -| :---------------: | :-----: | :-----: | :----: | :-------: | -| ResNet50-FPN | 2 | 1x | 36.0 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_r50_fpn_1x.tar) | -| ResNet101-FPN | 2 | 1x | 37.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_r101_fpn_1x.tar) | -| ResNeXt101-vd-FPN | 1 | 1x | 40.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/retinanet_x101_vd_64x4d_fpn_1x.tar) | - -**注意事项:** RetinaNet系列模型中,在总batch size为16下情况下,初始学习率改为0.01。 - -### SSD - -| 骨架网络 | 输入尺寸 | 每张GPU图片个数 | 学习率策略|推理时间(fps) | Box AP | 下载 | -| :----------: | :--: | :-----: | :-----: |:------------: |:----: | :-------: | -| VGG16 | 300 | 8 | 40万 | 81.613 | 25.1 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ssd_vgg16_300.tar) | -| VGG16 | 512 | 8 | 40万 | 46.007 | 29.1 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ssd_vgg16_512.tar) | - -**注意事项:** VGG-SSD在总batch size为32下训练40万轮。 - -### SSD 基于Pascal VOC数据集 - -| 骨架网络 | 输入尺寸 | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | 下载 | -| :----------- | :--: | :-----: | :-----: | :------------: |:----: | :-------: | -| MobileNet v1 | 300 | 32 | 120e | 159.543 | 73.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar) | -| VGG16 | 300 | 8 | 240e | 117.279 | 77.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ssd_vgg16_300_voc.tar) | -| VGG16 | 512 | 8 | 240e | 65.975 | 80.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ssd_vgg16_512_voc.tar) | - -**注意事项:** MobileNet-SSD在2卡,总batch size为64下训练120周期。VGG-SSD在总batch size为32下训练240周期。数据增强包括:随机颜色失真,随机剪裁,随机扩张,随机翻转。 - -## 人脸检测 - -详细请参考[人脸检测模型](../configs/face_detection). diff --git a/PaddleCV/PaddleDetection/docs/QUICK_STARTED.md b/PaddleCV/PaddleDetection/docs/QUICK_STARTED.md deleted file mode 100644 index 18b8602328075a2734dc139c16215e95a6025a3d..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/QUICK_STARTED.md +++ /dev/null @@ -1,67 +0,0 @@ -English | [简体中文](QUICK_STARTED_cn.md) - -# Quick Start - -This tutorial fine-tunes a tiny dataset by pretrained detection model for users to get a model and learn PaddleDetection quickly. The model can be trained in around 20min with good performance. - -## Data Preparation - -Dataset refers to [Kaggle](https://www.kaggle.com/mbkinaci/fruit-images-for-object-detection), which contains 240 images in train dataset and 60 images in test dataset. Data categories are apple, orange and banana. Download [here](https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar) and uncompress the dataset after download, script for data preparation is located at [download_fruit.py](../dataset/fruit/download_fruit.py). Command is as follows: - -```bash -export PYTHONPATH=$PYTHONPATH:. -python dataset/fruit/download_fruit.py -``` - -- **Note: before started, run the following command and specifiy the GPU** - -```bash -export PYTHONPATH=$PYTHONPATH:. -export CUDA_VISIBLE_DEVICES=0 -``` - -Training: - -```bash -python -u tools/train.py -c configs/yolov3_mobilenet_v1_fruit.yml \ - --use_tb=True \ - --tb_log_dir=tb_fruit_dir/scalar \ - --eval -``` - -Use `yolov3_mobilenet_v1` to fine-tune the model from COCO dataset. Meanwhile, loss and mAP can be observed on tensorboard. - -```bash -tensorboard --logdir tb_fruit_dir/scalar/ --host --port -``` - -Result on tensorboard is shown below: - -
- -
- -Model can be downloaded [here](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_fruit.tar) - -Evaluation: - -```bash -python -u tools/eval.py -c configs/yolov3_mobilenet_v1_fruit.yml -``` - -Inference: - -```bash -python -u tools/infer.py -c configs/yolov3_mobilenet_v1_fruit.yml \ - -o weights=https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_fruit.tar \ - --infer_img=demo/orange_71.jpg -``` - -Inference images are shown below: - -

- - -

- -For detailed infomation of training and evalution, please refer to [GETTING_STARTED.md](GETTING_STARTED.md). diff --git a/PaddleCV/PaddleDetection/docs/QUICK_STARTED_cn.md b/PaddleCV/PaddleDetection/docs/QUICK_STARTED_cn.md deleted file mode 100644 index c11f041ba4405ebbf6c60365bbe937df55e6374d..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/QUICK_STARTED_cn.md +++ /dev/null @@ -1,67 +0,0 @@ -[English](QUICK_STARTED.md) | 简体中文 - -# 快速开始 - -为了使得用户能够在很短的时间内快速产出模型,掌握PaddleDetection的使用方式,这篇教程通过一个预训练检测模型对小数据集进行finetune。在P40上单卡大约20min即可产出一个效果不错的模型。 - -## 数据准备 - -数据集参考[Kaggle数据集](https://www.kaggle.com/mbkinaci/fruit-images-for-object-detection),其中训练数据集240张图片,测试数据集60张图片,数据类别为3类:苹果,橘子,香蕉。[下载链接](https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar)。数据下载后分别解压即可, 数据准备脚本位于[download_fruit.py](../dataset/fruit/download_fruit.py)。下载数据方式如下: - -```bash -export PYTHONPATH=$PYTHONPATH:. -python dataset/fruit/download_fruit.py -``` - -- **注:在开始前,运行如下命令并指定GPU** - -```bash -export PYTHONPATH=$PYTHONPATH:. -export CUDA_VISIBLE_DEVICES=0 -``` - -训练命令如下: - -```bash -python -u tools/train.py -c configs/yolov3_mobilenet_v1_fruit.yml \ - --use_tb=True \ - --tb_log_dir=tb_fruit_dir/scalar \ - --eval -``` - -训练使用`yolov3_mobilenet_v1`基于COCO数据集训练好的模型进行finetune。训练期间可以通过tensorboard实时观察loss和精度值,启动命令如下: - -```bash -tensorboard --logdir tb_fruit_dir/scalar/ --host --port -``` - -tensorboard结果显示如下: - -
- -
- -训练模型[下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_fruit.tar) - -评估命令如下: - -```bash -python -u tools/eval.py -c configs/yolov3_mobilenet_v1_fruit.yml -``` - -预测命令如下 - -```bash -python -u tools/infer.py -c configs/yolov3_mobilenet_v1_fruit.yml \ - -o weights=https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_fruit.tar \ - --infer_img=demo/orange_71.jpg -``` - -预测图片如下: - -

- - -

- -更多训练及评估流程,请参考[GETTING_STARTED_cn.md](GETTING_STARTED_cn.md). diff --git a/PaddleCV/PaddleDetection/docs/TRANSFER_LEARNING.md b/PaddleCV/PaddleDetection/docs/TRANSFER_LEARNING.md deleted file mode 100644 index 0bc0377acb749ee896050660ba122a3a77ca20b7..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/TRANSFER_LEARNING.md +++ /dev/null @@ -1,39 +0,0 @@ -English | [简体中文](TRANSFER_LEARNING_cn.md) - -# Transfer Learning - -Transfer learning aims at learning new knowledge from existing knowledge. For example, take pretrained model from ImageNet to initialize detection models, or take pretrained model from COCO dataset to initialize train detection models in PascalVOC dataset. - -In transfer learning, if different dataset and the number of classes is used, the dimensional inconsistency will causes in loading parameters related to the number of classes; On the other hand, if more complicated model is used, need to motify the open-source model construction and selective load parameters. Thus, PaddleDetection should designate parameter fields and ignore loading the parameters which match the fields. - -## Transfer Learning in PaddleDetection - -In transfer learning, it's needed to load pretrained model selectively. The following two methods can be used: - -1. Set `finetune_exclude_pretrained_params` in YAML configuration files. Please refer to [configure file](../configs/yolov3_mobilenet_v1_fruit.yml#L15) -2. Set -o finetune_exclude_pretrained_params in command line. For example: - -```python -export PYTHONPATH=$PYTHONPATH:. -export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \ - -o pretrain_weights=https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar \ - finetune_exclude_pretrained_params=['cls_score','bbox_pred'] -``` - -* Note: - -1. The path in pretrain\_weights is the open-source model link of faster RCNN from COCO dataset. For full models link, please refer to [MODEL_ZOO](MODEL_ZOO.md) -2. The parameter fields are set in finetune\_exclude\_pretrained\_params. If the name of parameter matches field (wildcard matching), the parameter will be ignored in loading. - -If users want to fine-tune by own dataet, and remain the model construction, need to ignore the parameters related to the number of classes. PaddleDetection lists ignored parameter fields corresponding to different model type. The table is shown below:
- -| model type | ignored parameter fields | -| :----------------: | :---------------------------------------: | -| Faster RCNN | cls\_score, bbox\_pred | -| Cascade RCNN | cls\_score, bbox\_pred | -| Mask RCNN | cls\_score, bbox\_pred, mask\_fcn\_logits | -| Cascade-Mask RCNN | cls\_score, bbox\_pred, mask\_fcn\_logits | -| RetinaNet | retnet\_cls\_pred\_fpn | -| SSD | ^conv2d\_ | -| YOLOv3 | yolo\_output | diff --git a/PaddleCV/PaddleDetection/docs/TRANSFER_LEARNING_cn.md b/PaddleCV/PaddleDetection/docs/TRANSFER_LEARNING_cn.md deleted file mode 100644 index a54210d0aa9ef32096620e1830d49a2b2430b189..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/TRANSFER_LEARNING_cn.md +++ /dev/null @@ -1,37 +0,0 @@ -# 迁移学习 - -迁移学习为利用已有知识,对新知识进行学习。例如利用ImageNet分类预训练模型做初始化来训练检测模型,利用在COCO数据集上的检测模型做初始化来训练基于PascalVOC数据集的检测模型。 - -在进行迁移学习时,由于会使用不同的数据集,数据类别数与COCO/VOC数据类别不同,导致在加载PaddlePaddle开源模型时,与类别数相关的权重(例如分类模块的fc层)会出现维度不匹配的问题;另外,如果需要结构更加复杂的模型,需要对已有开源模型结构进行调整,对应权重也需要选择性加载。因此,需要检测库能够指定参数字段,在加载模型时不加载匹配的权重。 - -## PaddleDetection进行迁移学习 - -在迁移学习中,对预训练模型进行选择性加载,可通过如下两种方式实现: - -1. 在 YMAL 配置文件中通过设置`finetune_exclude_pretrained_params`字段。可参考[配置文件](../configs/yolov3_mobilenet_v1_fruit.yml#L15) -2. 在 train.py的启动参数中设置 -o finetune_exclude_pretrained_params。例如: - -```python -export PYTHONPATH=$PYTHONPATH:. -export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \ - -o pretrain_weights=https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar \ - finetune_exclude_pretrained_params=['cls_score','bbox_pred'] -``` - -* 说明: - -1. pretrain\_weights的路径为COCO数据集上开源的faster RCNN模型链接,完整模型链接可参考[MODEL_ZOO](MODEL_ZOO_cn.md) -2. finetune\_exclude\_pretrained\_params中设置参数字段,如果参数名能够匹配以上参数字段(通配符匹配方式),则在模型加载时忽略该参数。 - -如果用户需要利用自己的数据进行finetune,模型结构不变,只需要忽略与类别数相关的参数。PaddleDetection给出了不同模型类型所对应的忽略参数字段。如下表所示:
- -| 模型类型 | 忽略参数字段 | -| :----------------: | :---------------------------------------: | -| Faster RCNN | cls\_score, bbox\_pred | -| Cascade RCNN | cls\_score, bbox\_pred | -| Mask RCNN | cls\_score, bbox\_pred, mask\_fcn\_logits | -| Cascade-Mask RCNN | cls\_score, bbox\_pred, mask\_fcn\_logits | -| RetinaNet | retnet\_cls\_pred\_fpn | -| SSD | ^conv2d\_ | -| YOLOv3 | yolo\_output | diff --git a/PaddleCV/PaddleDetection/docs/config_example/mask_rcnn_r50_fpn_1x.yml b/PaddleCV/PaddleDetection/docs/config_example/mask_rcnn_r50_fpn_1x.yml deleted file mode 100644 index 50d38777a565e8b58e3d8dd4d40279e733bc3b1a..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/config_example/mask_rcnn_r50_fpn_1x.yml +++ /dev/null @@ -1,498 +0,0 @@ -# Architecture of detection, which is also the prefix of data feed module -architecture: MaskRCNN - -# Data feed module -train_feed: MaskRCNNTrainFeed -eval_feed: MaskRCNNEvalFeed -test_feed: MaskRCNNTestFeed - -# Use GPU or CPU, true by default -use_gpu: true - -# Maximum number of iteration. -# In rcnn models, max_iters is 180000 if lr schedule is 1x and batch_size is 1. -max_iters: 180000 - -# Snapshot period. If training and test at same time, evaluate model at each snapshot_iter. 10000 by default. -snapshot_iter: 10000 - -# Smooth the log output in specified iterations, 20 by default. -log_smooth_window: 20 - -# The number of iteration interval to display in training log. -log_iter: 20 - -# The directory to save models. -save_dir: output - -# The path of oretrained wegiths. If url is provided, it will download the pretrain_weights and decompress automatically. -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar - -# Evalution method, COCO and VOC are available. -metric: COCO - -# The path of final model for evaluation and test. -weights: output/mask_rcnn_r50_fpn_1x/model_final/ - -# Number of classes, 81 for COCO and 21 for VOC -num_classes: 81 - -# Mask RCNN architecture, see https://arxiv.org/abs/1703.06870 -MaskRCNN: - backbone: ResNet - fpn: FPN - roi_extractor: FPNRoIAlign - rpn_head: FPNRPNHead - bbox_assigner: BBoxAssigner - bbox_head: BBoxHead - mask_assigner: MaskAssigner - mask_head: MaskHead - rpn_only: false - -# Backbone module -ResNet: - # Index of stages using deformable conv v2, [] by default - dcn_v2_stages: [] - # ResNet depth, 50 by default - depth: 50 - # Stage index of returned feature map, [2,3,4,5] by default - feature_maps: - - 2 - - 3 - - 4 - - 5 - # Stage Index of backbone to freeze, 2 by default - freeze_at: 2 - # Whether freeze normalization layers, true by default - freeze_norm: true - # Weight decay for normalization layer weights, 0. by default - norm_decay: 0.0 - # Normalization type, bn/sync_bn/affine_channel, affine_channel by default - norm_type: affine_channel - # ResNet variant, supports 'a', 'b', 'c', 'd' currently, b by default - variant: b - -# FPN module -FPN: - # Whether has extra conv in higher levels, false by default - has_extra_convs: false - # Highest level of the backbone feature map to use, 6 by default - max_level: 6 - # Lowest level of the backbone feature map to use, 6 by default - min_level: 2 - # FPN normalization type, bn/sync_bn/affine_channel, null by default - norm_type: null - # Number of feature channels, 256 by default - num_chan: 256 - # Feature map scaling factors, [0.03125, 0.0625, 0.125, 0.25] by default - spatial_scale: - - 0.03125 - - 0.0625 - - 0.125 - - 0.25 - -# RPN module, if use non-FPN architecture, use RPNHead instead -# Extract proposals according to anchors and assign box targets and -# score targets to selected proposals to compute RPN loss. For FPN -# architecture, RPN is computed from each levels and collect proposals -# together. -FPNRPNHead: - # fluid.layers.anchor_generator - # Generate anchors for RCNN models. Each position of input produces - # N anchors. N = anchor_sizes * aspect_ratios. In FPNRPNHead, aspect_ratios - # is provided and anchor_sizes depends on FPN levels and anchor_start_size. - anchor_generator: - aspect_ratios: - - 0.5 - - 1.0 - - 2.0 - variance: - - 1.0 - - 1.0 - - 1.0 - - 1.0 - # fluid.layers.rpn_target_assign - # Assign classification and regression targets to each anchor according - # to Intersection-over-Union(IoU) overlap between anchors and ground - # truth boxes. The classification targets is binary class labels. the - # positive labels are two kinds of anchors: the anchors with the highest - # IoU overlap with a ground-truth box, or an anchor that has an IoU overlap - # higher than rpn_positive_overlap with any ground-truth box. - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_negative_overlap: 0.3 - rpn_positive_overlap: 0.7 - rpn_straddle_thresh: 0.0 - # fluid.layers.generate_proposals in training - # Generate RoIs according to each box with probability to be a foreground - # object. The operation performs following steps: Transposes and resizes - # scores and bbox_deltas; Calculate box locations as proposal candidates; - # Clip boxes to image; Remove predicted boxes with small area; Apply NMS to - # get final proposals as output. - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 2000 - pre_nms_top_n: 2000 - # fluid.layers.generate_proposals in test - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - post_nms_top_n: 1000 - pre_nms_top_n: 1000 - # Size of anchor at the first scale, 32 by default - anchor_start_size: 32 - # highest level of FPN output, 6 by default - max_level: 6 - # Lowest level of FPN output, 2 by default - min_level: 2 - # Number of FPN output channels, 256 by default - num_chan: 256 - # Number of classes in RPN output, 1 by default - num_classes: 1 - -# RoI extractor module, if use non-FPN architecture, use RoIAlign instead -# For FPN architecture, proposals are distributed to different levels and -# apply roi align at each level. Then concat the outputs. -FPNRoIAlign: - # The canconical FPN feature map level, 4 by default - canconical_level: 4 - # The canconical FPN feature map size, 224 by default - canonical_size: 224 - # The highest level of FPN layer, 5 by default - max_level: 5 - # The lowest level of FPN layer, 2 by default - min_level: 2 - # Number of sampling points, 0 by default - sampling_ratio: 2 - # Box resolution, 7 by default - box_resolution: 7 - # Mask RoI resolution, 14 by default - mask_resolution: 14 - -# Mask head module -# Generate mask output and compute loss mask. -MaskHead: - # Number of convolutions, 4 for FPN, 0 otherwise. 0 by default - num_convs: 4 - # size of the output mask, 14 by default - resolution: 28 - # Dilation rate, 1 by default - dilation: 1 - # Number of channels after first conv, 256 by default - num_chan_reduced: 256 - # Number of output classes, 81 by default - num_classes: 81 - -# fluid.layers.generate_proposal_labels -# Combine boxes and gt_boxes, and sample foreground proposals and background -# prosals.Then assign classification and regression targets to selected RoIs. -BBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: - - 0.1 - - 0.1 - - 0.2 - - 0.2 - bg_thresh_hi: 0.5 - bg_thresh_lo: 0.0 - fg_fraction: 0.25 - fg_thresh: 0.5 - num_classes: 81 - shuffle_before_sample: true - -# fluid.layers.generate_mask_labels -# For given the RoIs and corresponding labels, sample foreground RoIs. -# Assign mask targets to selected RoIs which are encoded to K binary masks -# of resolution M x M. -MaskAssigner: - resolution: 28 - num_classes: 81 - -# BBox head module -# Faster bbox head following the RoI extractor, and apply post process, such as -# NMS and box coder.. -BBoxHead: - # Head after RoI extractor, ResNetC5/TwoFCHead - head: TwoFCHead - # fluid.layers.multiclass_nms - # Select a subset of detection bounding boxes that have high scores larger - # than score_threshold. Then prune away boxes that have high IoU overlap - # with already selected boxes by nms_threshold. - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - # fluid.layers.box_coder - box_coder: - axis: 1 - box_normalized: false - code_type: decode_center_size - prior_box_var: - - 0.1 - - 0.1 - - 0.2 - - 0.2 - num_classes: 81 - -# RCNN head with two Fully Connected layers -TwoFCHead: - # The number of output channels, 1024 by default - num_chan: 1024 - -# Learning rate configuration -LearningRate: - # Base learning rate, 0.01 by default - base_lr: 0.01 - # Learning rate schedulers, PiecewiseDecay and LinearWarmup by default - schedulers: - # fluid.layers.piecewise_decay - # Values has higher priority and if values is null, learning rate is multipled by gamma at each stage - - !PiecewiseDecay - gamma: 0.1 - milestones: - - 120000 - - 160000 - values: null - # fluid.layers.linear_lr_warmup - # Start learning rate equals to base_lr * start_factor - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -# Optimizer module -OptimizerBuilder: - # fluid.optimizer - optimizer: - momentum: 0.9 - type: Momentum - # fluid.regularizer - regularizer: - factor: 0.0001 - type: L2 - -# Data feed module for training -MaskRCNNTrainFeed: - # Batch size per device, 1 by default - batch_size: 1 - # Dataset module - dataset: - # Annotation file path - annotation: annotations/instances_train2017.json - # Dataset directory - dataset_dir: dataset/coco - # Directory where image files are stored - image_dir: train2017 - # List of data fields needed - fields: - - image - - im_info - - im_id - - gt_box - - gt_label - - is_crowd - - gt_mask - # list of image dims - image_shape: - - 3 - - 800 - - 1333 - # List of sample transformations to use - sample_transforms: - # Transform the image data to numpy format. - - !DecodeImage - to_rgb: true # default: true - with_mixup: false # default: false - # Flip images randomly - # Transform the x coordinates of bboxes and segmentations - - !RandomFlipImage - is_mask_flip: true # default: false - # Whether bbox is normalized - is_normalized: false # default: false - prob: 0.5 # default: 0.5 - # Normalize the image - - !NormalizeImage - # The format of image, [H, W, C]/[C, H, W], true by default - is_channel_first: false - # Whether divide by 255, true by default - is_scale: true - # default: [0.485, 0.456, 0.406] - mean: - - 0.485 - - 0.456 - - 0.406 - # default: [1, 1, 1] - std: - - 0.229 - - 0.224 - - 0.225 - # Rescale image to the specified target size, and capped at max_size - - !ResizeImage - # Resize method, cv2.INTER_LINEAR(1) by default - interp: 1 - max_size: 1333 - target_size: 800 - use_cv2: true # default: true - # Change the channel - - !Permute - # The format of image, [H, W, C]/[C, H, W], true by default - channel_first: true - to_bgr: false # default: true - # List of batch transformations to use - batch_transforms: - # Pad a batch of samples to same dimensions - - !PadBatch - pad_to_stride: 32 # default: 32 - # Drop last batch if size is uneven, false by default - drop_last: false - # Number of workers processes(or threads), 2 by default - num_workers: 2 - # Number of samples, -1 represents all samples. -1 by default - samples: -1 - # If samples should be shuffled, true by default - shuffle: true - # If update im_info after padding, false by default - use_padded_im_info: false - # If use multi-process, false by default - use_process: false - -# Data feed module for test -MaskRCNNEvalFeed: - # Batch size per device, 1 by default - batch_size: 1 - # Dataset module - dataset: - # Annotation file path - annotation: annotations/instances_val2017.json - # Dataset directory - dataset_dir: dataset/coco - # Directory where image files are stored - image_dir: val2017 - # List of data fields needed - fields: - - image - - im_info - - im_id - - im_shape - # list of image dims - image_shape: - - 3 - - 800 - - 1333 - # List of sample transformations to use - sample_transforms: - # Transform the image data to numpy format. - - !DecodeImage - to_rgb: true # default: true - with_mixup: false # default: false - # Normalize the image - - !NormalizeImage - # The format of image, [H, W, C]/[C, H, W], true by default - is_channel_first: false - # Whether divide by 255, true by default - is_scale: true - # default: [0.485, 0.456, 0.406] - mean: - - 0.485 - - 0.456 - - 0.406 - # default: [1, 1, 1] - std: - - 0.229 - - 0.224 - - 0.225 - # Rescale image to the specified target size, and capped at max_size - - !ResizeImage - # Resize method, cv2.INTER_LINEAR(1) by default - interp: 1 - max_size: 1333 - target_size: 800 - use_cv2: true # default: true - # Change the channel - - !Permute - # The format of image, [H, W, C]/[C, H, W], true by default - channel_first: true - to_bgr: false # default: true - # List of batch transformations to use - batch_transforms: - # Pad a batch of samples to same dimensions - - !PadBatch - pad_to_stride: 32 # default: 32 - # Drop last batch if size is uneven, false by default - drop_last: false - # Number of workers processes(or threads), 2 by default - num_workers: 2 - # Number of samples, -1 represents all samples. -1 by default - samples: -1 - # If samples should be shuffled, true by default - shuffle: false - # If update im_info after padding, false by default - use_padded_im_info: true - # If use multi-process, false by default - use_process: false - -# Data feed module for test -MaskRCNNTestFeed: - # Batch size per device, 1 by default - batch_size: 1 - # Dataset module - dataset: - # Annotation file path - annotation: dataset/coco/annotations/instances_val2017.json - # List of data fields needed - fields: - - image - - im_info - - im_id - - im_shape - # list of image dims - image_shape: - - 3 - - 800 - - 1333 - # List of sample transformations to use - sample_transforms: - # Transform the image data to numpy format. - - !DecodeImage - to_rgb: true # default: true - with_mixup: false # default: false - # Normalize the image - - !NormalizeImage - # The format of image, [H, W, C]/[C, H, W], true by default - is_channel_first: false - # Whether divide by 255, true by default - is_scale: true - # default: [0.485, 0.456, 0.406] - mean: - - 0.485 - - 0.456 - - 0.406 - # default: [1, 1, 1] - std: - - 0.229 - - 0.224 - - 0.225 - # Change the channel - - !Permute - # The format of image, [H, W, C]/[C, H, W], true by default - channel_first: true - to_bgr: false # default: true - # List of batch transformations to use - batch_transforms: - # Pad a batch of samples to same dimensions - - !PadBatch - pad_to_stride: 32 # default: 32 - # Drop last batch if size is uneven, false by default - drop_last: false - # Number of workers processes(or threads), 2 by default - num_workers: 2 - # Number of samples, -1 represents all samples. -1 by default - samples: -1 - # If samples should be shuffled, true by default - shuffle: false - # If update im_info after padding, false by default - use_padded_im_info: true - # If use multi-process, false by default - use_process: false - - diff --git a/PaddleCV/PaddleDetection/docs/config_example/ssd_vgg16_300.yml b/PaddleCV/PaddleDetection/docs/config_example/ssd_vgg16_300.yml deleted file mode 100644 index 5a6dc7c210edc0ed2ac550a653463f3142dbc393..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/config_example/ssd_vgg16_300.yml +++ /dev/null @@ -1,427 +0,0 @@ -# Architecture of detection, which is also the prefix of data feed module. -architecture: SSD -# Data feed module. -# Data feed in training. -train_feed: SSDTrainFeed -# Data feed in Evaluation. -eval_feed: SSDEvalFeed -# Data feed in infer. -test_feed: SSDTestFeed -# Use GPU or CPU, true by default. -use_gpu: true -# Maximum number of iteration. -max_iters: 400000 -# Snapshot period. If training and test at same time, evaluate model at each snapshot_iter. 10000 by default. -snapshot_iter: 10000 -# Smooth the log output in specified iterations, 20 by default. -log_smooth_window: 20 -# The log in training is displayed once every period. -log_iter: 20 -# Evaluation method, COCO and VOC are available. -metric: COCO -# Evaluation mAP calculation method in VOC metric, 11point and integral are available. -map_type: 11point -# The path of final model for evaluation and test. -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_caffe_pretrained.tar -# The directory to save models. -save_dir: output -# The path of final model for evaluation and test. -weights: output/ssd_vgg16_300/model_final -# Number of classes, 81 for COCO and 21 for VOC. -num_classes: 81 - -# SSD architecture, see https://arxiv.org/abs/1512.02325 -SSD: - # backbone instance, defined below. - backbone: VGG - # `MultiBoxHead` instance, defined below. - multi_box_head: MultiBoxHead - - # fluid.layers.detection_output, Detection Output Layer for SSD. - # This operation is to get the detection results by performing following two steps: - # 1. Decode input bounding box predictions according to the prior boxes. - # 2. Get the final detection results by applying multi-class non maximum suppression (NMS). - # this operation doesn’t clip the final output bounding boxes to the image window. - output_decoder: - # The index of background label, the background label will be ignored. - # If set to -1, then all categories will be considered. - background_label: 0 - # Number of total bboxes to be kept per image after NMS. - keep_top_k: 200 - # The parameter for adaptive NMS. - nms_eta: 1.0 - # The threshold to be used in NMS. - nms_threshold: 0.45 - # Maximum number of detections to be kept according to the confidences - # aftern the filtering detections based on score_threshold. - nms_top_k: 400 - # Threshold to filter out bounding boxes with low confidence score. - # If not provided, consider all boxes. - score_threshold: 0.01 - -# VGG backbone, see https://arxiv.org/abs/1409.1556 -VGG: - # the VGG net depth (16 or 19 - depth: 16 - # whether or not extra blocks should be added - with_extra_blocks: true - # in each extra block, params: - # [in_channel, out_channel, padding_size, stride_size, filter_size] - extra_block_filters: - - [256, 512, 1, 2, 3] - - [128, 256, 1, 2, 3] - - [128, 256, 0, 1, 3] - - [128, 256, 0, 1, 3] - # params list of init scale in l2 norm, skip init scale if param is -1. - normalizations: [20., -1, -1, -1, -1, -1] - -# fluid.layers.multi_box_head, Generate prior boxes for SSD algorithm. -# Generate `prior_box` according to the inputs list and other parameters -# Each position of the input produce N prior boxes, N is determined by -# the count of min_sizes, max_sizes and aspect_ratios, The size of the box -# is in range(min_size, max_size) interval, which is generated in sequence -# according to the aspect_ratios. -MultiBoxHead: - # the base_size is used to get min_size and max_size according to min_ratio and max_ratio. - base_size: 300 - # the aspect ratios of generated prior boxes. The length of input and aspect_ratios must be equal. - aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]] - # the min ratio of generated prior boxes. - min_ratio: 15 - # the max ratio of generated prior boxes. - max_ratio: 90 - # If len(inputs) <=2, min_sizes must be set up, and the length of min_sizes - # should equal to the length of inputs. Default: None. - min_sizes: [30.0, 60.0, 111.0, 162.0, 213.0, 264.0] - # If len(inputs) <=2, max_sizes must be set up, and the length of min_sizes - # should equal to the length of inputs. Default: None. - max_sizes: [60.0, 111.0, 162.0, 213.0, 264.0, 315.0] - # If step_w and step_h are the same, step_w and step_h can be replaced by steps. - steps: [8, 16, 32, 64, 100, 300] - # Prior boxes center offset. Default: 0.5 - offset: 0.5 - # Whether to flip aspect ratios. Default:False. - flip: true - # The kernel size of conv2d. Default: 1. - kernel_size: 3 - # The padding of conv2d. Default:0. - pad: 1 - -# Learning rate configuration -LearningRate: - # Base learning rate, 0.01 by default - base_lr: 0.001 - # Learning rate schedulers, PiecewiseDecay and LinearWarmup by default - schedulers: - # fluid.layers.piecewise_decay - # Values has higher priority and if values is null, learning rate is multipled by gamma at each stage - - !PiecewiseDecay - gamma: 0.1 - milestones: [280000, 360000] - # fluid.layers.linear_lr_warmup - # Start learning rate equals to base_lr * start_factor - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -# Optimizer module -OptimizerBuilder: - # fluid.optimizer, Neural network in essence is a Optimization problem . - # With forward computing and back propagation , Optimizer use back-propagation - # gradients to optimize parameters in a neural network. - optimizer: - # Momentum optimizer adds momentum on the basis of SGD , - # reducing noise problem in the process of random gradient descent. - momentum: 0.9 - type: Momentum - # fluid.regularizer - regularizer: - # implements the L2 Weight Decay Regularization - # Small values of L2 can help prevent over fitting the training data. - factor: 0.0005 - type: L2 - -# Data feed module for training -SSDTrainFeed: - # Batch size per device - batch_size: 16 - # list of batch transformations to use - batch_transforms: [] - # The data buffer size - bufsize: 10 - # Dataset module - dataset: - # Dataset directory - dataset_dir: dataset/coco - # Annotation file path - annotation: annotations/instances_train2017.json - # Directory where image files are stored - image_dir: train2017 - # Drop last batch if size is uneven, false by default - drop_last: true - # List of data fields needed - fields: [image, gt_box, gt_label] - # list of image dims - image_shape: [3, 300, 300] - # number of workers processes (or threads) - num_workers: 8 - # List of sample transformations to use - sample_transforms: - # Transform the image data to numpy format. - - !DecodeImage - # whether to convert BGR to RGB - to_rgb: true # default: true - # whether or not to mixup image and gt_bbbox/gt_score - with_mixup: false # default: false - # Transform the bounding box's coornidates to [0,1]. - - !NormalizeBox {} - # modify image brightness,contrast,saturation,hue,reordering channels and etc. - - !RandomDistort - # brightness_lower/ brightness_upper (float): the brightness - # between brightness_lower and brightness_upper - brightness_lower: 0.875 - brightness_upper: 1.125 - # brightness_prob (float): the probability of changing brightness - brightness_prob: 0.5 - # contrast_lower/ contrast_upper (float): the contrast between - # contrast_lower and contrast_lower - contrast_lower: 0.5 - contrast_upper: 1.5 - # contrast_prob (float): the probability of changing contrast - contrast_prob: 0.5 - # count (int): the kinds of doing distrot - count: 4 - # hue_lower/ hue_upper (float): the hue between hue_lower and hue_upper - hue_lower: -18 - hue_upper: 18 - # hue_prob (float): the probability of changing hue - hue_prob: 0.5 - # is_order (bool): whether determine the order of distortion - is_order: true - # saturation_lower/ saturation_upper (float): the saturation - # between saturation_lower and saturation_upper - saturation_lower: 0.5 - saturation_upper: 1.5 - # saturation_prob (float): the probability of changing saturation - saturation_prob: 0.5 - #Expand the image and modify bounding box. - # Operators: - # 1. Scale the image weight and height. - # 2. Construct new images with new height and width. - # 3. Fill the new image with the mean. - # 4. Put original imge into new image. - # 5. Rescale the bounding box. - # 6. Determine if the new bbox is satisfied in the new image. - - !ExpandImage - # max_ratio (float): the ratio of expanding - max_ratio: 4 - # mean (list): the pixel mean - mean: [104, 117, 123] - # prob (float): the probability of expanding image - prob: 0.5 - # Crop the image and modify bounding box. - # Operators: - # 1. Scale the image weight and height. - # 2. Crop the image according to a radom sample. - # 3. Rescale the bounding box. - # 4. Determine if the new bbox is satisfied in the new image. - - !CropImage - # avoid_no_bbox (bool): whether to to avoid the - # situation where the box does not appear. - avoid_no_bbox: false - # batch_sampler (list): Multiple sets of different parameters for cropping. - batch_sampler: - - [1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0] - - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0] - # satisfy_all (bool): whether all boxes must satisfy. - satisfy_all: false - # Rescale image to the specified target size, and capped at max_size if max_size != 0. - # If target_size is list, selected a scale randomly as the specified target size. - - !ResizeImage - # Resize method, cv2.INTER_LINEAR(1) by default - interp: 1 - # max_size (int): the max size of image - max_size: 0 - # target_size (int|list): the target size of image's short side, - # multi-scale training is adopted when type is list. - target_size: 300 - # use_cv2 (bool): use the cv2 interpolation method or use PIL interpolation method - use_cv2: false - # Filp the image and bounding box. - # Operators: - # 1. Flip the image numpy. - # 2. Transform the bboxes' x coordinates. (Must judge whether the coordinates are normalized!) - # 3. Transform the segmentations' x coordinates. (Must judge whether the coordinates are normalized!) - - !RandomFlipImage - # is_mask_flip (bool): whether flip the segmentation - is_mask_flip: false - # is_normalized (bool): whether the bbox scale to [0,1] - is_normalized: true - # prob (float): the probability of flipping image - prob: 0.5 - # Change the channel - - !Permute - # The format of image, [H, W, C]/[C, H, W], true by default - channel_first: true - # to_bgr (bool): confirm whether to convert RGB to BGR - to_bgr: true - # Normalize the image. - # Operators: - # 1.(optional) Scale the image to [0,1] - # 2. Each pixel minus mean and is divided by std - - !NormalizeImage - # The format of image, [H, W, C]/[C, H, W], true by default - is_channel_first: true - # Whether divide by 255, true by default - is_scale: false - # mean (list): the pixel mean - mean: [104, 117, 123] - # std (list): the pixel variance - std: [1, 1, 1] - # Number of samples, -1 represents all samples. -1 by default - samples: -1 - # If samples should be shuffled, true by default - shuffle: true - # If use multi-process, false by default - use_process: true - -# Data feed module for Eval -SSDEvalFeed: - # Batch size per device - batch_size: 32 - # list of batch transformations to use - batch_transforms: [] - # The data buffer size - bufsize: 10 - # Dataset module - dataset: - # Dataset directory - dataset_dir: dataset/coco - # Annotation file path - annotation: annotations/instances_val2017.json - # Directory where image files are stored - image_dir: val2017 - # Drop last batch if size is uneven, false by default - drop_last: true - # List of data fields needed - fields: [image, im_shape, im_id, gt_box, gt_label, is_difficult] - # list of image dims - image_shape: [3, 300, 300] - # number of workers processes (or threads) - num_workers: 8 - # List of sample transformations to use - sample_transforms: - # Transform the image data to numpy format. - - !DecodeImage - # whether to convert BGR to RGB - to_rgb: true # default: true - # whether or not to mixup image and gt_bbbox/gt_score - with_mixup: false # default: false - # Transform the bounding box's coornidates to [0,1]. - - !NormalizeBox {} - # Rescale image to the specified target size, and capped at max_size if max_size != 0. - # If target_size is list, selected a scale randomly as the specified target size. - - !ResizeImage - # Resize method, cv2.INTER_LINEAR(1) by default - interp: 1 - # max_size (int): the max size of image - max_size: 0 - # target_size (int|list): the target size of image's short side, - # multi-scale training is adopted when type is list. - target_size: 300 - # use_cv2 (bool): use the cv2 interpolation method or use PIL interpolation method - use_cv2: false - - !Permute - # The format of image, [H, W, C]/[C, H, W], true by default - channel_first: true - # to_bgr (bool): confirm whether to convert RGB to BGR - to_bgr: true - # Normalize the image. - # Operators: - # 1.(optional) Scale the image to [0,1] - # 2. Each pixel minus mean and is divided by std - - !NormalizeImage - # The format of image, [H, W, C]/[C, H, W], true by default - is_channel_first: true - # Whether divide by 255, true by default - is_scale: false - # mean (list): the pixel mean - mean: [104, 117, 123] - # std (list): the pixel variance - std: [1, 1, 1] - # Number of samples, -1 represents all samples. -1 by default - samples: -1 - # If samples should be shuffled, true by default - shuffle: false - # If use multi-process, false by default - use_process: false - -# Data feed module for test -SSDTestFeed: - # Batch size per device - batch_size: 1 - # list of batch transformations to use - batch_transforms: [] - # The data buffer size - bufsize: 10 - # Dataset module - dataset: - # Annotation file path - annotation: dataset/coco/annotations/instances_val2017.json - # Drop last batch if size is uneven, false by default - drop_last: false - # List of data fields needed - fields: [image, im_id] - # list of image dims - image_shape: [3, 300, 300] - # number of workers processes (or threads) - num_workers: 8 - # List of sample transformations to use - sample_transforms: - # Transform the image data to numpy format. - - !DecodeImage - # whether to convert BGR to RGB - to_rgb: true # default: true - # whether or not to mixup image and gt_bbbox/gt_score - with_mixup: false # default: false - # Rescale image to the specified target size, and capped at max_size if max_size != 0. - # If target_size is list, selected a scale randomly as the specified target size. - - !ResizeImage - # Resize method, cv2.INTER_LINEAR(1) by default - interp: 1 - # max_size (int): the max size of image - max_size: 0 - # target_size (int|list): the target size of image's short side, - # multi-scale training is adopted when type is list. - target_size: 300 - # use_cv2 (bool): use the cv2 interpolation method or use PIL interpolation method - use_cv2: false - - !Permute - # The format of image, [H, W, C]/[C, H, W], true by default - channel_first: true - # to_bgr (bool): confirm whether to convert RGB to BGR - to_bgr: true - # Normalize the image. - # Operators: - # 1.(optional) Scale the image to [0,1] - # 2. Each pixel minus mean and is divided by std - - !NormalizeImage - # The format of image, [H, W, C]/[C, H, W], true by default - is_channel_first: true - # Whether divide by 255, true by default - is_scale: false - # mean (list): the pixel mean - mean: [104, 117, 123] - # std (list): the pixel variance - std: [1, 1, 1] - # Number of samples, -1 represents all samples. -1 by default - samples: -1 - # If samples should be shuffled, true by default - shuffle: false - # If use multi-process, false by default - use_process: false diff --git a/PaddleCV/PaddleDetection/docs/config_example/yolov3_darknet.yml b/PaddleCV/PaddleDetection/docs/config_example/yolov3_darknet.yml deleted file mode 100644 index 65f479b72bb2504f84bbed5fed20afd78bad0ea9..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/docs/config_example/yolov3_darknet.yml +++ /dev/null @@ -1,323 +0,0 @@ -# Architecture of detection, which is also the prefix of data feed module -architecture: YOLOv3 - -# Data feed module. -train_feed: YoloTrainFeed -eval_feed: YoloEvalFeed -test_feed: YoloTestFeed - -# Use GPU or CPU, true by default. -use_gpu: true - -# Maximum number of iteration. -# In YOLOv3 model, default iteration number is to train for 270 epoches. -max_iters: 500200 - -# Smooth the log output in specified iterations, 20 by default. -log_smooth_window: 20 - -# The number of iteration interval to display in training log. -log_iter: 20 - -# The directory to save models. -save_dir: output - -# Snapshot period. If training and test at same time, evaluate model at each snapshot_iter. 2000 by default. -snapshot_iter: 2000 - -# Evalution method, COCO and VOC are available. -metric: COCO - -# The path of oretrained wegiths. If url is provided, it will be downloaded and decompressed automatically. -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar -# The path of final model for evaluation and test. -weights: output/yolov3_darknet/model_final - -# Number of classes, 80 for COCO and 20 for VOC. -num_classes: 80 - - -# YOLOv3 architecture, see https://arxiv.org/abs/1804.02767 -YOLOv3: - backbone: DarkNet - yolo_head: YOLOv3Head - -# Backbone module -DarkNet: - # Batch normalization type in training, sync_bn for synchronized batch normalization - norm_type: sync_bn - # L2 weight decay factor of batch normalization layer - norm_decay: 0. - # Darknet convolution layer number, only support 53 currently - depth: 53 - -# YOLOv3 head module -# Generate bbox output in evaluation and calculate loss in training -# fluid.layers.yolov3_loss / fluid.layers.yolo_box -YOLOv3Head: - # anchor mask of 3 yolo_loss/yolo_box layers, each yolo_loss/yolo_box layer has 3 anchors - anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - # 9 anchors for 3 yolo_loss/yolo_box layer, generated by perform kmeans on COCO gtboxes - anchors: [[10, 13], [16, 30], [33, 23], - [30, 61], [62, 45], [59, 119], - [116, 90], [156, 198], [373, 326]] - # L2 weight decay factor of batch normalization layer - norm_decay: 0. - # Ignore threshold for yolo_loss layer, 0.7 by default. - # Objectness loss will be ignored if a predcition bbox overlap a gtbox over ignore_thresh. - ignore_thresh: 0.7 - # Whether use label smooth in yolo_loss layer - # It is recommended to set as true when only num_classes is very big - label_smooth: true - # fluid.layers.multiclass_nms - # Non-max suppress for output prediction boxes, see multiclass_nms for following parameters. - # 1. Select detection bounding boxes with high scores larger than score_threshold. - # 2. Select detection bounding boxes with the largest nms_top_k scores. - # 3. Suppress detection bounding boxes which have high IoU overlap witch already selected boxes. - # 4. Keep the top keep_top_k detection bounding boxes as output. - nms: - # Which label is regard as backgroud and will be ignored, -1 for no backgroud label. - background_label: -1 - # Number of total bboxes to be kept per image after NMS step. - keep_top_k: 100 - # IoU threshold for NMS, bbox with IoU over nms_threshold will be suppressed. - nms_threshold: 0.45 - # Maximum number of detections to be kept according to the confidences after the filtering detections based on score_threshold. - nms_top_k: 1000 - # Whether detections are normalized. - normalized: false - # Threshold to filter out bounding boxes with low confidence score. - score_threshold: 0.01 - -# Learning rate configuration -LearningRate: - # Base learning rate for training, 1e-3 by default. - base_lr: 0.001 - # Learning rate schedulers, PiecewiseDecay and LinearWarmup by default - schedulers: - # fluid.layers.piecewise_decay - # each milestone stage decay gamma - - !PiecewiseDecay - gamma: 0.1 - milestones: - - 400000 - - 450000 - # fluid.layers.linear_lr_warmup - # Start learning rate equals to base_lr * start_factor - - !LinearWarmup - start_factor: 0. - steps: 4000 - -# Optimizer module -OptimizerBuilder: - # fluid.optimizer - optimizer: - momentum: 0.9 - type: Momentum - # fluid.regularizer - regularizer: - factor: 0.0005 - type: L2 - -# Data feed module for training -YoloTrainFeed: - # Batch size per device, 8 by default - batch_size: 8 - # Dataset module - dataset: - # Dataset directory. - dataset_dir: dataset/coco - # Annotation file path. - annotation: annotations/instances_train2017.json - # Directory where image files are stored. - image_dir: train2017 - # List of data fields needed. - fields: [image, gt_box, gt_label, gt_score] - # List of image dims - image_shape: [3, 608, 608] - # List of sample transformations to use. - sample_transforms: - # read image data and decode to numpy. - - !DecodeImage - to_rgb: true - # YOLOv3 use image mixup in training. - with_mixup: true - # Mixup two images in training, a trick to improve performance. - - !MixupImage - alpha: 1.5 # default: 1.5 - beta: 1.5 # default: 1.5 - # Normalize gtbox to range [0, 1] - - !NormalizeBox {} - # Random color distort: brightness, contrast, hue, saturation. - - !RandomDistort - brightness_lower: 0.5 - brightness_prob: 0.5 - brightness_upper: 1.5 - contrast_lower: 0.5 - contrast_prob: 0.5 - contrast_upper: 1.5 - count: 4 - hue_lower: -18 - hue_prob: 0.5 - hue_upper: 18 - is_order: false - saturation_lower: 0.5 - saturation_prob: 0.5 - saturation_upper: 1.5 - # Random Expand the image and modify bounding box. - # Operators: - # 1. Scale the image weight and height. - # 2. Construct new images with new height and width. - # 3. Fill the new image with the mean. - # 4. Put original imge into new image. - # 5. Rescale the bounding box. - # 6. Determine if the new bbox is satisfied in the new image. - - !ExpandImage - # max expand ratio, default 4.0. - max_ratio: 4.0 - mean: [123.675, 116.28, 103.53] - prob: 0.5 - # Random Crop the image and modify bounding box. - # Operators: - # 1. Scale the image weight and height. - # 2. Crop the image according to a radom sample. - # 3. Rescale the bounding box. - # 4. Determine if the new bbox is satisfied in the new image. - - !CropImage - # Recrop image if there are no bbox in output cropped image. - avoid_no_bbox: true - batch_sampler: [[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]] - # Whether should all bbox satisfy IoU constrains. - satisfy_all: false - # Interpolate image to target_size with random interpolate method: - # cv2.INTER_NEAREST, - # cv2.INTER_LINEAR, - # cv2.INTER_AREA, - # cv2.INTER_CUBIC, - # cv2.INTER_LANCZOS4, - - !RandomInterpImage - max_size: 0 - target_size: 608 - # Filp the image and bounding box. - # Operators: - # 1. Flip the image numpy. - # 2. Transform the bboxes' x coordinates. (Must judge whether the coordinates are normalized!) - # 3. Transform the segmentations' x coordinates. (Must judge whether the coordinates are normalized!) - - !RandomFlipImage - is_mask_flip: false - is_normalized: true - prob: 0.5 - # Normalize the image. - # Operators: - # 1.(optional) Scale the image to [0,1] - # 2. Each pixel minus mean and is divided by std - - !NormalizeImage - is_channel_first: false - is_scale: true - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - # Change data layout to [C, H, W]. - - !Permute - channel_first: true - to_bgr: false - # List of batch transformations to use. - batch_transforms: - # Random reshape images in each mini-batch to different shapes. - - !RandomShape - sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608] - # YOLOv3 read gtbox into zero padded tensor with max box number as 50. - num_max_boxes: 50 - # YOLOv3 read gtlabel without regarding backgroud as label 0. - with_background: false - # Number of samples, -1 represents all samples. -1 by default. - samples: -1 - # Whether samples should be shuffled, true by default. - shuffle: true - # Whether drop last images which less than a batch. - drop_last: true - # Whether use multi-process reader in training. - use_process: true - # Use multi-process reader number. - num_workers: 8 - # Buffer size for reader. - bufsize: 128 - # Mixup image epoch number. - mixup_epoch: 250 - -# Data feed module for evaluation -YoloEvalFeed: - batch_size: 8 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: [] - fields: [image, im_size, im_id, gt_box, gt_label, is_difficult] - image_shape: [3, 608, 608] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - # Rescale image to the specified target size, and capped at max_size if max_size != 0. - # If target_size is list, selected a scale randomly as the specified target size. - - !ResizeImage - interp: 2 # 2 for cv2.INTER_CUBIC - max_size: 0 - target_size: 608 - use_cv2: true - - !NormalizeImage - is_channel_first: false - is_scale: true - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - - !Permute - channel_first: true - to_bgr: false - num_max_boxes: 50 - samples: -1 - shuffle: false - drop_last: false - # Use multi-thread reader in evaluation mode. - use_process: false - # Thread number for multi-thread reader. - num_workers: 8 - with_background: false - -# Data feed module for test -YoloTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: [] - fields: [image, im_size, im_id] - sample_transforms: - - !DecodeImage - to_rgb: true - with_mixup: false - - !ResizeImage - interp: 2 - max_size: 0 - target_size: 608 - use_cv2: true - - !NormalizeImage - is_channel_first: false - is_scale: true - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - - !Permute - channel_first: true - to_bgr: false - num_max_boxes: 50 - samples: -1 - shuffle: false - drop_last: false - # Use multi-thread reader in test mode. - use_process: false - num_workers: 8 - with_background: false diff --git a/PaddleCV/PaddleDetection/docs/images/bench_ssd_yolo_infer.png b/PaddleCV/PaddleDetection/docs/images/bench_ssd_yolo_infer.png deleted file mode 100644 index f81600b14cbe9af4b12f86c574ce2dffa937774f..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/docs/images/bench_ssd_yolo_infer.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/inference/CMakeLists.txt b/PaddleCV/PaddleDetection/inference/CMakeLists.txt deleted file mode 100644 index ed610da047316d0b08d73d51e0223a06180b4026..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/CMakeLists.txt +++ /dev/null @@ -1,272 +0,0 @@ -cmake_minimum_required(VERSION 3.0) -project(cpp_inference_demo CXX C) -message("cmake module path: ${CMAKE_MODULE_PATH}") -message("cmake root path: ${CMAKE_ROOT}") -option(WITH_MKL "Compile demo with MKL/OpenBlas support,defaultuseMKL." ON) -option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." ON) -option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON) -option(USE_TENSORRT "Compile demo with TensorRT." OFF) - -SET(PADDLE_DIR "" CACHE PATH "Location of libraries") -SET(OPENCV_DIR "" CACHE PATH "Location of libraries") -SET(CUDA_LIB "" CACHE PATH "Location of libraries") - - -include(external-cmake/yaml-cpp.cmake) - -macro(safe_set_static_flag) - foreach(flag_var - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) - if(${flag_var} MATCHES "/MD") - string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") - endif(${flag_var} MATCHES "/MD") - endforeach(flag_var) -endmacro() - -if (WITH_MKL) - ADD_DEFINITIONS(-DUSE_MKL) -endif() - -if (NOT DEFINED PADDLE_DIR OR ${PADDLE_DIR} STREQUAL "") - message(FATAL_ERROR "please set PADDLE_DIR with -DPADDLE_DIR=/path/paddle_influence_dir") -endif() - -if (NOT DEFINED OPENCV_DIR OR ${OPENCV_DIR} STREQUAL "") - message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv") -endif() - -include_directories("${CMAKE_SOURCE_DIR}/") -include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include") -include_directories("${PADDLE_DIR}/") -include_directories("${PADDLE_DIR}/third_party/install/protobuf/include") -include_directories("${PADDLE_DIR}/third_party/install/glog/include") -include_directories("${PADDLE_DIR}/third_party/install/gflags/include") -include_directories("${PADDLE_DIR}/third_party/install/xxhash/include") -if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/include") - include_directories("${PADDLE_DIR}/third_party/install/snappy/include") -endif() -if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include") - include_directories("${PADDLE_DIR}/third_party/install/snappystream/include") -endif() -include_directories("${PADDLE_DIR}/third_party/install/zlib/include") -include_directories("${PADDLE_DIR}/third_party/boost") -include_directories("${PADDLE_DIR}/third_party/eigen3") - -if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib") - link_directories("${PADDLE_DIR}/third_party/install/snappy/lib") -endif() -if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") - link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib") -endif() - -link_directories("${PADDLE_DIR}/third_party/install/zlib/lib") -link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib") -link_directories("${PADDLE_DIR}/third_party/install/glog/lib") -link_directories("${PADDLE_DIR}/third_party/install/gflags/lib") -link_directories("${PADDLE_DIR}/third_party/install/xxhash/lib") -link_directories("${PADDLE_DIR}/paddle/lib/") -link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib") -link_directories("${CMAKE_CURRENT_BINARY_DIR}") -if (WIN32) - include_directories("${PADDLE_DIR}/paddle/fluid/inference") - link_directories("${PADDLE_DIR}/paddle/fluid/inference") - include_directories("${OPENCV_DIR}/build/include") - include_directories("${OPENCV_DIR}/opencv/build/include") - link_directories("${OPENCV_DIR}/build/x64/vc14/lib") -else () - include_directories("${PADDLE_DIR}/paddle/include") - link_directories("${PADDLE_DIR}/paddle/lib") - include_directories("${OPENCV_DIR}/include") - link_directories("${OPENCV_DIR}/lib") -endif () - -if (WIN32) - add_definitions("/DGOOGLE_GLOG_DLL_DECL=") - set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd") - set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") - if (WITH_STATIC_LIB) - safe_set_static_flag() - add_definitions(-DSTATIC_LIB) - endif() -else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -o2 -std=c++11") - set(CMAKE_STATIC_LIBRARY_PREFIX "") -endif() - -# TODO let users define cuda lib path -if (WITH_GPU) - if (NOT DEFINED CUDA_LIB OR ${CUDA_LIB} STREQUAL "") - message(FATAL_ERROR "please set CUDA_LIB with -DCUDA_LIB=/path/cuda-8.0/lib64") - endif() - if (NOT WIN32) - if (NOT DEFINED CUDNN_LIB) - message(FATAL_ERROR "please set CUDNN_LIB with -DCUDNN_LIB=/path/cudnn_v7.4/cuda/lib64") - endif() - endif(NOT WIN32) -endif() - - -if (NOT WIN32) - if (USE_TENSORRT AND WITH_GPU) - include_directories("${PADDLE_DIR}/third_party/install/tensorrt/include") - link_directories("${PADDLE_DIR}/third_party/install/tensorrt/lib") - endif() -endif(NOT WIN32) - -if (NOT WIN32) - set(NGRAPH_PATH "${PADDLE_DIR}/third_party/install/ngraph") - if(EXISTS ${NGRAPH_PATH}) - include(GNUInstallDirs) - include_directories("${NGRAPH_PATH}/include") - link_directories("${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}") - set(NGRAPH_LIB ${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}/libngraph${CMAKE_SHARED_LIBRARY_SUFFIX}) - endif() -endif() - -if(WITH_MKL) - include_directories("${PADDLE_DIR}/third_party/install/mklml/include") - if (WIN32) - set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.lib - ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.lib) - else () - set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} - ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) - endif () - set(MKLDNN_PATH "${PADDLE_DIR}/third_party/install/mkldnn") - if(EXISTS ${MKLDNN_PATH}) - include_directories("${MKLDNN_PATH}/include") - if (WIN32) - set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib) - else () - set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0) - endif () - endif() -else() - set(MATH_LIB ${PADDLE_DIR}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) -endif() - -if(WITH_STATIC_LIB) - if (WIN32) - set(DEPS - ${PADDLE_DIR}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) - else () - set(DEPS - ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) - endif() -else() - if (WIN32) - set(DEPS - ${PADDLE_DIR}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) - else () - set(DEPS - ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX}) - endif() -endif() - -if (NOT WIN32) - set(EXTERNAL_LIB "-lrt -ldl -lpthread") - set(DEPS ${DEPS} - ${MATH_LIB} ${MKLDNN_LIB} - glog gflags protobuf yaml-cpp z xxhash - ${EXTERNAL_LIB}) - if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") - set(DEPS ${DEPS} snappystream) - endif() - if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib") - set(DEPS ${DEPS} snappy) - endif() -else() - set(DEPS ${DEPS} - ${MATH_LIB} ${MKLDNN_LIB} - opencv_world346 glog libyaml-cppmt gflags_static libprotobuf zlibstatic xxhash ${EXTERNAL_LIB}) - set(DEPS ${DEPS} libcmt shlwapi) - if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib") - set(DEPS ${DEPS} snappy) - endif() - if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") - set(DEPS ${DEPS} snappystream) - endif() -endif(NOT WIN32) - -if(WITH_GPU) - if(NOT WIN32) - if (USE_TENSORRT) - set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX}) - endif() - set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX}) - else() - set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} ) - set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} ) - set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX}) - endif() -endif() - -if (NOT WIN32) - set(OPENCV_LIB_DIR ${OPENCV_DIR}/lib) - if(EXISTS "${OPENCV_LIB_DIR}") - message("OPENCV_LIB:" ${OPENCV_LIB_DIR}) - else() - set(OPENCV_LIB_DIR ${OPENCV_DIR}/lib64) - message("OPENCV_LIB:" ${OPENCV_LIB_DIR}) - endif() - - set(OPENCV_3RD_LIB_DIR ${OPENCV_DIR}/share/OpenCV/3rdparty/lib) - if(EXISTS "${OPENCV_3RD_LIB_DIR}") - message("OPENCV_3RD_LIB_DIR:" ${OPENCV_3RD_LIB_DIR}) - else() - set(OPENCV_3RD_LIB_DIR ${OPENCV_DIR}/share/OpenCV/3rdparty/lib64) - message("OPENCV_3RD_LIB_DIR:" ${OPENCV_3RD_LIB_DIR}) - endif() - - set(DEPS ${DEPS} ${OPENCV_LIB_DIR}/libopencv_imgcodecs${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_LIB_DIR}/libopencv_imgproc${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_LIB_DIR}/libopencv_core${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_LIB_DIR}/libopencv_highgui${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/libIlmImf${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/liblibjasper${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/liblibpng${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/liblibtiff${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/libittnotify${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/liblibjpeg-turbo${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/liblibwebp${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/libzlib${CMAKE_STATIC_LIBRARY_SUFFIX}) - if(EXISTS "${OPENCV_3RD_LIB_DIR}/libippiw${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/libippiw${CMAKE_STATIC_LIBRARY_SUFFIX}) - endif() - if(EXISTS "${OPENCV_3RD_LIB_DIR}/libippicv${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(DEPS ${DEPS} ${OPENCV_3RD_LIB_DIR}/libippicv${CMAKE_STATIC_LIBRARY_SUFFIX}) - endif() -endif() -# message(${CMAKE_CXX_FLAGS}) -# set(CMAKE_CXX_FLAGS "-g ${CMAKE_CXX_FLAGS}") - -SET(PADDLESEG_INFERENCE_SRCS preprocessor/preprocessor.cpp - preprocessor/preprocessor_detection.cpp predictor/detection_predictor.cpp - utils/detection_result.pb.cc) - -ADD_LIBRARY(libpaddleseg_inference STATIC ${PADDLESEG_INFERENCE_SRCS}) -target_link_libraries(libpaddleseg_inference ${DEPS}) - -add_executable(detection_demo detection_demo.cpp) - -ADD_DEPENDENCIES(libpaddleseg_inference ext-yaml-cpp) -ADD_DEPENDENCIES(detection_demo ext-yaml-cpp libpaddleseg_inference) -target_link_libraries(detection_demo ${DEPS} libpaddleseg_inference) - -if (WIN32) - add_custom_command(TARGET detection_demo POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./mklml.dll - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./libiomp5md.dll - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll - ) -endif() - -execute_process(COMMAND cp -r ${CMAKE_SOURCE_DIR}/images ${CMAKE_SOURCE_DIR}/conf ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/PaddleCV/PaddleDetection/inference/LICENSE b/PaddleCV/PaddleDetection/inference/LICENSE deleted file mode 100644 index 261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/PaddleCV/PaddleDetection/inference/README.md b/PaddleCV/PaddleDetection/inference/README.md deleted file mode 100644 index 302b5fb3818df8c1ca871095ff368129ce1292fd..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/README.md +++ /dev/null @@ -1,171 +0,0 @@ -# PaddleDetection C++预测部署方案 - -## 本文档结构 - -[1.说明](#1说明) - -[2.主要目录和文件](#2主要目录和文件) - -[3.编译](#3编译) - -[4.预测并可视化结果](#4预测并可视化结果) - - - - -## 1.说明 - -本目录提供一个跨平台的图像检测模型的C++预测部署方案,用户通过一定的配置,加上少量的代码,即可把模型集成到自己的服务中,完成相应的图像检测任务。 - -主要设计的目标包括以下四点: -- 跨平台,支持在 Windows 和 Linux 完成编译、开发和部署 -- 可扩展性,支持用户针对新模型开发自己特殊的数据预处理等逻辑 -- 高性能,除了`PaddlePaddle`自身带来的性能优势,我们还针对图像检测的特点对关键步骤进行了性能优化 -- 支持多种常见的图像检测模型,如YOLOv3, Faster-RCNN, Faster-RCNN+FPN,用户通过少量配置即可加载模型完成常见检测任务 - -## 2.主要目录和文件 - -```bash -deploy -├── detection_demo.cpp # 完成图像检测预测任务C++代码 -│ -├── conf -│ ├── detection_rcnn.yaml #示例faster rcnn 目标检测配置 -│ └── detection_rcnn_fpn.yaml #示例faster rcnn + fpn目标检测配置 -├── images -│ └── detection_rcnn # 示例faster rcnn + fpn目标检测测试图片目录 -├── tools -│ └── vis.py # 示例图像检测结果可视化脚本 -├── docs -│ ├── linux_build.md # Linux 编译指南 -│ ├── windows_vs2015_build.md # windows VS2015编译指南 -│ └── windows_vs2019_build.md # Windows VS2019编译指南 -│ -├── utils # 一些基础公共函数 -│ -├── preprocess # 数据预处理相关代码 -│ -├── predictor # 模型加载和预测相关代码 -│ -├── CMakeList.txt # cmake编译入口文件 -│ -└── external-cmake # 依赖的外部项目cmake(目前仅有yaml-cpp) - -``` - -## 3.编译 -支持在`Windows`和`Linux`平台编译和使用: -- [Linux 编译指南](./docs/linux_build.md) -- [Windows 使用 Visual Studio 2019 Community 编译指南](./docs/windows_vs2019_build.md) -- [Windows 使用 Visual Studio 2015 编译指南](./docs/windows_vs2015_build.md) - -`Windows`上推荐使用最新的`Visual Studio 2019 Community`直接编译`CMake`项目。 - -## 4.预测并可视化结果 - -完成编译后,便生成了需要的可执行文件和链接库。这里以我们基于`faster rcnn`检测模型为例,介绍部署图像检测模型的通用流程。 - -### 1. 下载模型文件 -我们提供faster rcnn,faster rcnn+fpn模型用于预测coco17数据集,可在以下链接下载:[faster rcnn示例模型下载地址](https://paddleseg.bj.bcebos.com/inference/faster_rcnn_pp50.zip), - [faster rcnn + fpn示例模型下载地址](https://paddleseg.bj.bcebos.com/inference/faster_rcnn_pp50_fpn.zip)。 - -下载并解压,解压后目录结构如下: -``` -faster_rcnn_pp50/ -├── __model__ # 模型文件 -│ -└── __params__ # 参数文件 -``` -解压后把上述目录拷贝到合适的路径: - -**假设**`Windows`系统上,我们模型和参数文件所在路径为`D:\projects\models\faster_rcnn_pp50`。 - -**假设**`Linux`上对应的路径则为`/root/projects/models/faster_rcnn_pp50/`。 - - -### 2. 修改配置 - -`inference`源代码(即本目录)的`conf`目录下提供了示例基于faster rcnn的配置文件`detection_rcnn.yaml`, 相关的字段含义和说明如下: - -```yaml -DEPLOY: - # 是否使用GPU预测 - USE_GPU: 1 - # 模型和参数文件所在目录路径 - MODEL_PATH: "/root/projects/models/faster_rcnn_pp50" - # 模型文件名 - MODEL_FILENAME: "__model__" - # 参数文件名 - PARAMS_FILENAME: "__params__" - # 预测图片的标准输入,尺寸不一致会resize - EVAL_CROP_SIZE: (608, 608) - # resize方式,支持 UNPADDING和RANGE_SCALING - RESIZE_TYPE: "RANGE_SCALING" - # 短边对齐的长度,仅在RANGE_SCALING下有效 - TARGET_SHORT_SIZE : 800 - # 均值 - MEAN: [0.4647, 0.4647, 0.4647] - # 方差 - STD: [0.0834, 0.0834, 0.0834] - # 图片类型, rgb或者rgba - IMAGE_TYPE: "rgb" - # 像素分类数 - NUM_CLASSES: 1 - # 通道数 - CHANNELS : 3 - # 预处理器, 目前提供图像检测的通用处理类DetectionPreProcessor - PRE_PROCESSOR: "DetectionPreProcessor" - # 预测模式,支持 NATIVE 和 ANALYSIS - PREDICTOR_MODE: "ANALYSIS" - # 每次预测的 batch_size - BATCH_SIZE : 3 - # 长边伸缩的最大长度,-1代表无限制。 - RESIZE_MAX_SIZE: 1333 - # 输入的tensor数量。 - FEEDS_SIZE: 3 - -``` -修改字段`MODEL_PATH`的值为你在**上一步**下载并解压的模型文件所放置的目录即可。更多配置文件字段介绍,请参考文档[预测部署方案配置文件说明](./docs/configuration.md)。 - -### 3. 执行预测 - -在终端中切换到生成的可执行文件所在目录为当前目录(Windows系统为`cmd`)。 - -`Linux` 系统中执行以下命令: -```shell -./detection_demo --conf=conf/detection_rcnn.yaml --input_dir=images/detection_rcnn -``` -`Windows` 中执行以下命令: -```shell -.\detection_demo.exe --conf=conf\detection_rcnn.yaml --input_dir=images\detection_rcnn\ -``` - - -预测使用的两个命令参数说明如下: - -| 参数 | 含义 | -|-------|----------| -| conf | 模型配置的Yaml文件路径 | -| input_dir | 需要预测的图片目录 | - -· -配置文件说明请参考上一步,样例程序会扫描input_dir目录下的所有图片,并为每一张图片生成对应的预测结果,输出到屏幕,并在`X`同一目录下保存到`X.pb文件`(X为对应图片的文件名)。可使用工具脚本vis.py将检测结果可视化。 - -**检测结果可视化** - -运行可视化脚本时,只需输入命令行参数图片路径、检测结果pb文件路径、目标框阈值以及类别-标签映射文件路径即可得到可视化的图片`X.png` (tools目录下提供coco17的类别标签映射文件coco17.json)。 - -```bash -python vis.py --img_path=../build/images/detection_rcnn/000000087038.jpg --img_result_path=../build/images/detection_rcnn/000000087038.jpg.pb --threshold=0.1 --c2l_path=coco17.json -``` - -检测结果(每个图片的结果用空行隔开) - -```原图:``` - -![原图](./demo_images/000000087038.jpg) - -```检测结果图:``` - -![检测结果](./demo_images/000000087038.jpg.png) - diff --git a/PaddleCV/PaddleDetection/inference/conf/detection_rcnn.yaml b/PaddleCV/PaddleDetection/inference/conf/detection_rcnn.yaml deleted file mode 100644 index 50c23fbb3e53ff159844e65da4ed194e169cffb6..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/conf/detection_rcnn.yaml +++ /dev/null @@ -1,18 +0,0 @@ -DEPLOY: - USE_GPU: 1 - MODEL_PATH: "/root/projects/models/faster_rcnn_pp50" - MODEL_FILENAME: "__model__" - PARAMS_FILENAME: "__params__" - EVAL_CROP_SIZE: (608, 608) - RESIZE_TYPE: "RANGE_SCALING" - TARGET_SHORT_SIZE : 800 - MEAN: [0.485, 0.456, 0.406] - STD: [0.229, 0.224, 0.225] - IMAGE_TYPE: "rgb" - NUM_CLASSES: 1 - CHANNELS : 3 - PRE_PROCESSOR: "DetectionPreProcessor" - PREDICTOR_MODE: "ANALYSIS" - BATCH_SIZE : 3 - RESIZE_MAX_SIZE: 1333 - FEEDS_SIZE: 3 diff --git a/PaddleCV/PaddleDetection/inference/conf/detection_rcnn_fpn.yaml b/PaddleCV/PaddleDetection/inference/conf/detection_rcnn_fpn.yaml deleted file mode 100644 index 9d6635ef8c2b29fb0ca9318d1ec08f1f7be037f7..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/conf/detection_rcnn_fpn.yaml +++ /dev/null @@ -1,19 +0,0 @@ -DEPLOY: - USE_GPU: 1 - MODEL_PATH: "/root/projects/models/faster_rcnn_pp50_fpn" - MODEL_FILENAME: "__model__" - PARAMS_FILENAME: "__params__" - EVAL_CROP_SIZE: (608, 608) - RESIZE_TYPE: "RANGE_SCALING" - TARGET_SHORT_SIZE : 800 - MEAN: [0.485, 0.456, 0.406] - STD: [0.229, 0.224, 0.225] - IMAGE_TYPE: "rgb" - NUM_CLASSES: 1 - CHANNELS : 3 - PRE_PROCESSOR: "DetectionPreProcessor" - PREDICTOR_MODE: "ANALYSIS" - BATCH_SIZE : 1 - RESIZE_MAX_SIZE: 1333 - FEEDS_SIZE: 3 - COARSEST_STRIDE: 32 diff --git a/PaddleCV/PaddleDetection/inference/demo_images/000000087038.jpg b/PaddleCV/PaddleDetection/inference/demo_images/000000087038.jpg deleted file mode 100644 index 9f77f5d5f057b6f92dc096da704ecb8dee99bdf5..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/inference/demo_images/000000087038.jpg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/inference/demo_images/000000087038.jpg.png b/PaddleCV/PaddleDetection/inference/demo_images/000000087038.jpg.png deleted file mode 100644 index aa2c63d1c3dd1ca08d517239842ce5bd40310d01..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/inference/demo_images/000000087038.jpg.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/inference/detection_demo.cpp b/PaddleCV/PaddleDetection/inference/detection_demo.cpp deleted file mode 100644 index 7e711ed6970358c528a3198bb6168a871d83d380..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/detection_demo.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include - -DEFINE_string(conf, "", "Configuration File Path"); -DEFINE_string(input_dir, "", "Directory of Input Images"); - -int main(int argc, char** argv) { - // 0. parse args - google::ParseCommandLineFlags(&argc, &argv, true); - if (FLAGS_conf.empty() || FLAGS_input_dir.empty()) { - std::cout << "Usage: ./predictor --conf=/config/path/to/your/model --input_dir=/directory/of/your/input/images"; - return -1; - } - // 1. create a predictor and init it with conf - PaddleSolution::DetectionPredictor predictor; - if (predictor.init(FLAGS_conf) != 0) { - LOG(FATAL) << "Fail to init predictor"; - return -1; - } - - // 2. get all the images with extension '.jpeg' at input_dir - auto imgs = PaddleSolution::utils::get_directory_images(FLAGS_input_dir, ".jpeg|.jpg|.JPEG|.JPG|.bmp|.BMP|.png|.PNG"); - - // 3. predict - predictor.predict(imgs); - return 0; -} diff --git a/PaddleCV/PaddleDetection/inference/docs/configuration.md b/PaddleCV/PaddleDetection/inference/docs/configuration.md deleted file mode 100644 index cb2f761f792009c1accb52048d6e4f2cdcb6ad29..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/docs/configuration.md +++ /dev/null @@ -1,75 +0,0 @@ -# 预测部署方案配置文件说明 -## 基本概念 -预测部署方案的配置文件旨在给用户提供一个预测部署方案定制化接口。用户仅需理解该配置文件相关字段的含义,无需编写任何代码,即可定制化预测部署方案。为了更好地表达每个字段的含义,首先介绍配置文件中字段的类型。 - -### 字段类型 -- **required**: 表明该字段必须显式定义,否则无法正常启动预测部署程序。 -- **optional**: 表明该字段可忽略不写,预测部署系统会提供默认值,相关默认值将在下文介绍。 - -### 字段值类型 -- **int**:表明该字段必须赋予整型类型的值。 -- **string**:表明该字段必须赋予字符串类型的值。 -- **list**:表明该字段必须赋予列表的值。 -- **tuple**: 表明该字段必须赋予双元素元组的值。 - -## 字段介绍 - -```yaml -# 预测部署时所有配置字段需在DEPLOY字段下 -DEPLOY: - # 类型:required int - # 含义:是否使用GPU预测。 0:不使用 1:使用 - USE_GPU: 1 - # 类型:required string - # 含义:模型和参数文件所在目录 - MODEL_PATH: "/path/to/model_directory" - # 类型:required string - # 含义:模型文件名 - MODEL_FILENAME: "__model__" - # 类型:required string - # 含义:参数文件名 - PARAMS_FILENAME: "__params__" - # 类型:optional string - # 含义:图像resize的类型。支持 UNPADDING 和 RANGE_SCALING模式。默认是UNPADDING模式。 - RESIZE_TYPE: "UNPADDING" - # 类型:required tuple - # 含义:当使用UNPADDING模式时,会将图像直接resize到该尺寸。 - EVAL_CROP_SIZE: (513, 513) - # 类型:optional int - # 含义:当使用RANGE_SCALING模式时,图像短边需要对齐该字段的值,长边会同比例 - # 的缩放,从而在保持图像长宽比例不变的情况下resize到新的尺寸。默认值为0。 - TARGET_SHORT_SIZE: 800 - # 类型:optional int - # 含义: 当使用RANGE_SCALING模式时,长边不能缩放到比该字段的值大。默认值为0。 - RESIZE_MAX_SIZE: 1333 - # 类型:required list - # 含义:图像进行归一化预处理时的均值 - MEAN: [104.008, 116.669, 122.675] - # 类型:required list - # 含义:图像进行归一化预处理时的方差 - STD: [1.0, 1.0, 1.0] - # 类型:string - # 含义:图片类型, rgb 或者 rgba - IMAGE_TYPE: "rgb" - # 类型:required int - # 含义:图像分类类型数 - NUM_CLASSES: 2 - # 类型:required int - # 含义:图片通道数 - CHANNELS : 3 - # 类型:required string - # 含义:预处理方式,目前提供图像检测的通用预处理类DetectionPreProcessor. - PRE_PROCESSOR: "DetectionPreProcessor" - # 类型:required string - # 含义:预测模式,支持 NATIVE 和 ANALYSIS - PREDICTOR_MODE: "ANALYSIS" - # 类型:required int - # 含义:每次预测的 batch_size - BATCH_SIZE : 3 - # 类型:optional int - # 含义: 输入张量的个数。大部分模型不需要设置。 默认值为1. - FEEDS_SIZE: 2 - # 类型: optional int - # 含义: 将图像的边变为该字段的值的整数倍。默认值为1。 - COARSEST_STRIDE: 32 -``` \ No newline at end of file diff --git a/PaddleCV/PaddleDetection/inference/docs/linux_build.md b/PaddleCV/PaddleDetection/inference/docs/linux_build.md deleted file mode 100644 index 2ad9e46383123efee47b941f97c8e7690c7b95d6..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/docs/linux_build.md +++ /dev/null @@ -1,84 +0,0 @@ -# Linux平台 编译指南 - -## 说明 -本文档在 `Linux`平台使用`GCC 4.8.5` 和 `GCC 4.9.4`测试过,如果需要使用更高G++版本编译使用,则需要重新编译Paddle预测库,请参考: [从源码编译Paddle预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_usage/deploy/inference/build_and_install_lib_cn.html#id15)。 - -## 前置条件 -* G++ 4.8.2 ~ 4.9.4 -* CUDA 8.0/ CUDA 9.0 -* CMake 3.0+ - -请确保系统已经安装好上述基本软件,**下面所有示例以工作目录为 `/root/projects/`演示**。 - -### Step1: 下载代码 - -1. `mkdir -p /root/projects/paddle_models && cd /root/projects/paddle_models` -2. `git clone https://github.com/PaddlePaddle/models.git` - -`C++`预测代码在`/root/projects/paddle_models/models/PaddleCV/PaddleDetection/inference` 目录,该目录不依赖任何`PaddleDetection`下其他目录。 - - -### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference - -目前仅支持`CUDA 8` 和 `CUDA 9`,请点击 [PaddlePaddle预测库下载地址](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_usage/deploy/inference/build_and_install_lib_cn.html)下载对应的版本(develop版本)。 - - -下载并解压后`/root/projects/fluid_inference`目录包含内容为: -``` -fluid_inference -├── paddle # paddle核心库和头文件 -| -├── third_party # 第三方依赖库和头文件 -| -└── version.txt # 版本和编译信息 -``` - -### Step3: 安装配置OpenCV - -```shell -# 0. 切换到/root/projects目录 -cd /root/projects -# 1. 下载OpenCV3.4.6版本源代码 -wget -c https://paddleseg.bj.bcebos.com/inference/opencv-3.4.6.zip -# 2. 解压 -unzip opencv-3.4.6.zip && cd opencv-3.4.6 -# 3. 创建build目录并编译, 这里安装到/usr/local/opencv3目录 -mkdir build && cd build -cmake .. -DCMAKE_INSTALL_PREFIX=/root/projects/opencv3 -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DWITH_IPP=OFF -DBUILD_IPP_IW=OFF -DWITH_LAPACK=OFF -DWITH_EIGEN=OFF -DCMAKE_INSTALL_LIBDIR=lib64 -DWITH_ZLIB=ON -DBUILD_ZLIB=ON -DWITH_JPEG=ON -DBUILD_JPEG=ON -DWITH_PNG=ON -DBUILD_PNG=ON -DWITH_TIFF=ON -DBUILD_TIFF=ON -make -j4 -make install -``` - -**注意:** 上述操作完成后,`opencv` 被安装在 `/root/projects/opencv3` 目录。 - -### Step4: 编译 - -`CMake`编译时,涉及到四个编译参数用于指定核心依赖库的路径, 他们的定义如下: - -| 参数名 | 含义 | -| ---- | ---- | -| CUDA_LIB | cuda的库路径 | -| CUDNN_LIB | cuDnn的库路径| -| OPENCV_DIR | OpenCV的安装路径, | -| PADDLE_DIR | Paddle预测库的路径 | - -执行下列操作时,**注意**把对应的参数改为你的上述依赖库实际路径: - -```shell -cd /root/projects/paddle_models/models/PaddleCV/PaddleDetection/inference - -mkdir build && cd build -cmake .. -DWITH_GPU=ON -DPADDLE_DIR=/root/projects/fluid_inference -DCUDA_LIB=/usr/local/cuda/lib64/ -DOPENCV_DIR=/root/projects/opencv3/ -DCUDNN_LIB=/usr/local/cuda/lib64/ -make -``` - - -### Step5: 预测及可视化 - -执行命令: - -``` -./detection_demo --conf=/path/to/your/conf --input_dir=/path/to/your/input/data/directory -``` - -更详细说明请参考ReadMe文档: [预测和可视化部分](../README.md) diff --git a/PaddleCV/PaddleDetection/inference/docs/windows_vs2015_build.md b/PaddleCV/PaddleDetection/inference/docs/windows_vs2015_build.md deleted file mode 100644 index be1c0289d404c17e561928173b104228ea63dbda..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/docs/windows_vs2015_build.md +++ /dev/null @@ -1,97 +0,0 @@ -# Windows平台使用 Visual Studio 2015 编译指南 - -本文档步骤,我们同时在`Visual Studio 2015` 和 `Visual Studio 2019 Community` 两个版本进行了测试,我们推荐使用[`Visual Studio 2019`直接编译`CMake`项目](./windows_vs2019_build.md)。 - - -## 前置条件 -* Visual Studio 2015 -* CUDA 8.0/ CUDA 9.0 -* CMake 3.0+ - -请确保系统已经安装好上述基本软件,**下面所有示例以工作目录为 `D:\projects`演示**。 - -### Step1: 下载代码 - -1. 打开`cmd`, 执行 `cd D:\projects\paddle_models` -2. `git clone https://github.com/PaddlePaddle/models.git` - -`C++`预测库代码在`D:\projects\paddle_models\models\PaddleCV\PaddleDetection\inference` 目录,该目录不依赖任何`PaddleDetection`下其他目录。 - - -### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference - -根据Windows环境,下载相应版本的PaddlePaddle预测库,并解压到`D:\projects\`目录 - -| CUDA | GPU | 下载地址 | -|------|------|--------| -| 8.0 | Yes | [fluid_inference.zip](https://bj.bcebos.com/v1/paddleseg/fluid_inference_win.zip) | -| 9.0 | Yes | [fluid_inference_cuda90.zip](https://paddleseg.bj.bcebos.com/fluid_inference_cuda9_cudnn7.zip) | - -解压后`D:\projects\fluid_inference`目录包含内容为: -``` -fluid_inference -├── paddle # paddle核心库和头文件 -| -├── third_party # 第三方依赖库和头文件 -| -└── version.txt # 版本和编译信息 -``` - -### Step3: 安装配置OpenCV - -1. 在OpenCV官网下载适用于Windows平台的3.4.6版本, [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download) -2. 运行下载的可执行文件,将OpenCV解压至指定目录,如`D:\projects\opencv` -3. 配置环境变量,如下流程所示 - - 我的电脑->属性->高级系统设置->环境变量 - - 在系统变量中找到Path(如没有,自行创建),并双击编辑 - - 新建,将opencv路径填入并保存,如`D:\projects\opencv\build\x64\vc14\bin` - -### Step4: 以VS2015为例编译代码 - -以下命令需根据自己系统中各相关依赖的路径进行修改 - -* 调用VS2015, 请根据实际VS安装路径进行调整,打开cmd命令行工具执行以下命令 -* 其他vs版本(比如vs2019),请查找到对应版本的`vcvarsall.bat`路径,替换本命令即可 - -``` -call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 -``` - -* CMAKE编译工程 - * PADDLE_DIR: fluid_inference预测库路径 - * CUDA_LIB: CUDA动态库目录, 请根据实际安装情况调整 - * OPENCV_DIR: OpenCV解压目录 - -``` -# 切换到预测库所在目录 -cd /d D:\projects\paddle_models\models\PaddleCV\PaddleDetection\inference -# 创建构建目录, 重新构建只需要删除该目录即可 -mkdir build -cd build -# cmake构建VS项目 -D:\projects\paddle_models\models\PaddleCV\PaddleDetection\inference\build> cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_GPU=ON -DPADDLE_DIR=D:\projects\fluid_inference -DCUDA_LIB=D:\projects\cudalib\v9.0\lib\x64 -DOPENCV_DIR=D:\projects\opencv -T host=x64 -``` - -这里的`cmake`参数`-G`, 表示生成对应的VS版本的工程,可以根据自己的`VS`版本调整,具体请参考[cmake文档](https://cmake.org/cmake/help/v3.15/manual/cmake-generators.7.html) - -* 生成可执行文件 - -``` -D:\projects\paddle_models\models\PaddleCV\PaddleDetection\inference\build> msbuild /m /p:Configuration=Release cpp_inference_demo.sln -``` - -### Step5: 预测及可视化 - -上述`Visual Studio 2015`编译产出的可执行文件在`build\release`目录下,切换到该目录: -``` -cd /d D:\projects\paddle_models\models\PaddleCV\PaddleDetection\inference\build\release -``` - -之后执行命令: - -``` -detection_demo.exe --conf=/path/to/your/conf --input_dir=/path/to/your/input/data/directory -``` - -更详细说明请参考ReadMe文档: [预测和可视化部分](../README.md) - diff --git a/PaddleCV/PaddleDetection/inference/docs/windows_vs2019_build.md b/PaddleCV/PaddleDetection/inference/docs/windows_vs2019_build.md deleted file mode 100644 index f3f589a9a246e494439b26f516ea319c270ff9ab..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/docs/windows_vs2019_build.md +++ /dev/null @@ -1,102 +0,0 @@ -# Visual Studio 2019 Community CMake 编译指南 - -Windows 平台下,我们使用`Visual Studio 2015` 和 `Visual Studio 2019 Community` 进行了测试。微软从`Visual Studio 2017`开始即支持直接管理`CMake`跨平台编译项目,但是直到`2019`才提供了稳定和完全的支持,所以如果你想使用CMake管理项目编译构建,我们推荐你使用`Visual Studio 2019`环境下构建。 - -你也可以使用和`VS2015`一样,通过把`CMake`项目转化成`VS`项目来编译,其中**有差别的部分**在文档中我们有说明,请参考:[使用Visual Studio 2015 编译指南](./windows_vs2015_build.md) - -## 前置条件 -* Visual Studio 2019 -* CUDA 8.0/ CUDA 9.0 -* CMake 3.0+ - -请确保系统已经安装好上述基本软件,我们使用的是`VS2019`的社区版。 - -**下面所有示例以工作目录为 `D:\projects`演示**。 - -### Step1: 下载代码 - -1. 点击下载源代码:[下载地址](https://github.com/PaddlePaddle/models/archive/develop.zip) -2. 解压,解压后目录重命名为`paddle_models` - -以下代码目录路径为`D:\projects\paddle_models` 为例。 - - -### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference - -根据Windows环境,下载相应版本的PaddlePaddle预测库,并解压到`D:\projects\`目录 - -| CUDA | GPU | 下载地址 | -|------|------|--------| -| 8.0 | Yes | [fluid_inference.zip](https://bj.bcebos.com/v1/paddleseg/fluid_inference_win.zip) | -| 9.0 | Yes | [fluid_inference_cuda90.zip](https://paddleseg.bj.bcebos.com/fluid_inference_cuda9_cudnn7.zip) | - -解压后`D:\projects\fluid_inference`目录包含内容为: -``` -fluid_inference -├── paddle # paddle核心库和头文件 -| -├── third_party # 第三方依赖库和头文件 -| -└── version.txt # 版本和编译信息 -``` -**注意:** `CUDA90`版本解压后目录名称为`fluid_inference_cuda90`。 - -### Step3: 安装配置OpenCV - -1. 在OpenCV官网下载适用于Windows平台的3.4.6版本, [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download) -2. 运行下载的可执行文件,将OpenCV解压至指定目录,如`D:\projects\opencv` -3. 配置环境变量,如下流程所示 - - 我的电脑->属性->高级系统设置->环境变量 - - 在系统变量中找到Path(如没有,自行创建),并双击编辑 - - 新建,将opencv路径填入并保存,如`D:\projects\opencv\build\x64\vc14\bin` - -### Step4: 使用Visual Studio 2019直接编译CMake - -1. 打开Visual Studio 2019 Community,点击`继续但无需代码` -![step2](https://paddleseg.bj.bcebos.com/inference/vs2019_step1.png) -2. 点击: `文件`->`打开`->`CMake` -![step2.1](https://paddleseg.bj.bcebos.com/inference/vs2019_step2.png) - -选择项目代码所在路径,并打开`CMakeList.txt`: - -![step2.2](https://paddleseg.bj.bcebos.com/inference/vs2019_step3.png) - -3. 点击:`项目`->`cpp_inference_demo的CMake设置` - -![step3](https://paddleseg.bj.bcebos.com/inference/vs2019_step4.png) - -4. 点击`浏览`,分别设置编译选项指定`CUDA`、`OpenCV`、`Paddle预测库`的路径 - -![step4](https://paddleseg.bj.bcebos.com/inference/vs2019_step5.png) - -三个编译参数的含义说明如下: - -| 参数名 | 含义 | -| ---- | ---- | -| CUDA_LIB | cuda的库路径 | -| OPENCV_DIR | OpenCV的安装路径, | -| PADDLE_DIR | Paddle预测库的路径 | - -**设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。 - -5. 点击`生成`->`全部生成` - -![step6](https://paddleseg.bj.bcebos.com/inference/vs2019_step6.png) - - -### Step5: 预测及可视化 - -上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录: - -``` -cd D:\projects\paddle_models\models\PaddleCV\PaddleDetection\inference\build\x64-Release -``` - -之后执行命令: - -``` -detection_demo.exe --conf=/path/to/your/conf --input_dir=/path/to/your/input/data/directory -``` - -更详细说明请参考ReadMe文档: [预测和可视化部分](../README.md) - diff --git a/PaddleCV/PaddleDetection/inference/external-cmake/yaml-cpp.cmake b/PaddleCV/PaddleDetection/inference/external-cmake/yaml-cpp.cmake deleted file mode 100644 index 15fa2674e00d85f1db7bbdfdceeebadaf0eabf5a..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/external-cmake/yaml-cpp.cmake +++ /dev/null @@ -1,29 +0,0 @@ - -find_package(Git REQUIRED) - -include(ExternalProject) - -message("${CMAKE_BUILD_TYPE}") - -ExternalProject_Add( - ext-yaml-cpp - GIT_REPOSITORY https://github.com/jbeder/yaml-cpp.git - GIT_TAG e0e01d53c27ffee6c86153fa41e7f5e57d3e5c90 - CMAKE_ARGS - -DYAML_CPP_BUILD_TESTS=OFF - -DYAML_CPP_BUILD_TOOLS=OFF - -DYAML_CPP_INSTALL=OFF - -DYAML_CPP_BUILD_CONTRIB=OFF - -DMSVC_SHARED_RT=OFF - -DBUILD_SHARED_LIBS=OFF - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} - -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib - -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib - PREFIX "${CMAKE_BINARY_DIR}/ext/yaml-cpp" - # Disable install step - INSTALL_COMMAND "" - LOG_DOWNLOAD ON -) diff --git a/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000014439.jpg b/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000014439.jpg deleted file mode 100644 index 0abbdab06eb5950b93908cc91adfa640e8a3ac78..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000014439.jpg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000087038.jpg b/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000087038.jpg deleted file mode 100644 index 9f77f5d5f057b6f92dc096da704ecb8dee99bdf5..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000087038.jpg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000570688.jpg b/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000570688.jpg deleted file mode 100644 index cb304bd56c4010c08611a30dcca58ea9140cea54..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/inference/images/detection_rcnn/000000570688.jpg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/inference/predictor/detection_predictor.cpp b/PaddleCV/PaddleDetection/inference/predictor/detection_predictor.cpp deleted file mode 100644 index ba07e3b6c7fb2152bd7825950a3cd94769f36adc..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/predictor/detection_predictor.cpp +++ /dev/null @@ -1,383 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "detection_predictor.h" -#include -#include -#include -#include "utils/detection_result.pb.h" - -namespace PaddleSolution { - /* lod_buffer: every item in lod_buffer is an image matrix after preprocessing - * input_buffer: same data with lod_buffer after flattening to 1-D vector and padding, needed to be empty before using this function - */ - void padding_minibatch(const std::vector> &lod_buffer, std::vector &input_buffer, - std::vector &resize_heights, std::vector &resize_widths, int channels, int coarsest_stride = 1) { - int batch_size = lod_buffer.size(); - int max_h = -1; - int max_w = -1; - for(int i = 0; i < batch_size; ++i) { - max_h = (max_h > resize_heights[i])? max_h:resize_heights[i]; - max_w = (max_w > resize_widths[i])? max_w:resize_widths[i]; - } - max_h = static_cast(ceil(static_cast(max_h) / static_cast(coarsest_stride)) * coarsest_stride); - max_w = static_cast(ceil(static_cast(max_w) / static_cast(coarsest_stride)) * coarsest_stride); - std::cout << "max_w: " << max_w << " max_h: " << max_h << std::endl; - input_buffer.insert(input_buffer.end(), batch_size * channels * max_h * max_w, 0); - // flatten tensor and padding - for(int i = 0; i < lod_buffer.size(); ++i) { - float *input_buffer_ptr = input_buffer.data() + i * channels * max_h * max_w; - const float *lod_ptr = lod_buffer[i].data(); - for(int c = 0; c < channels; ++c) { - for(int h = 0; h < resize_heights[i]; ++h) { - memcpy(input_buffer_ptr, lod_ptr, resize_widths[i] * sizeof(float)); - lod_ptr += resize_widths[i]; - input_buffer_ptr += max_w; - } - input_buffer_ptr += (max_h - resize_heights[i]) * max_w; - } - } - // change resize w, h - for(int i = 0; i < batch_size; ++i){ - resize_widths[i] = max_w; - resize_heights[i] = max_h; - } - } - - void output_detection_result(const float* out_addr, const std::vector> &lod_vector, const std::vector &imgs_batch){ - for(int i = 0; i < lod_vector[0].size() - 1; ++i) { - DetectionResult detection_result; - detection_result.set_filename(imgs_batch[i]); - std::cout << imgs_batch[i] << ":" << std::endl; - for (int j = lod_vector[0][i]; j < lod_vector[0][i+1]; ++j) { - DetectionBox *box_ptr = detection_result.add_detection_boxes(); - box_ptr->set_class_(static_cast(round(out_addr[0 + j * 6]))); - box_ptr->set_score(out_addr[1 + j * 6]); - box_ptr->set_left_top_x(out_addr[2 + j * 6]); - box_ptr->set_left_top_y(out_addr[3 + j * 6]); - box_ptr->set_right_bottom_x(out_addr[4 + j * 6]); - box_ptr->set_right_bottom_y(out_addr[5 + j * 6]); - printf("Class %d, score = %f, left top = [%f, %f], right bottom = [%f, %f]\n", - static_cast(round(out_addr[0 + j * 6])), out_addr[1 + j * 6], out_addr[2 + j * 6], - out_addr[3 + j * 6], out_addr[4 + j * 6], out_addr[5 + j * 6]); - } - printf("\n"); - std::ofstream output(imgs_batch[i] + ".pb", std::ios::out | std::ios::trunc | std::ios::binary); - detection_result.SerializeToOstream(&output); - output.close(); - } - } - - int DetectionPredictor::init(const std::string& conf) { - if (!_model_config.load_config(conf)) { - LOG(FATAL) << "Fail to load config file: [" << conf << "]"; - return -1; - } - _preprocessor = PaddleSolution::create_processor(conf); - if (_preprocessor == nullptr) { - LOG(FATAL) << "Failed to create_processor"; - return -1; - } - - bool use_gpu = _model_config._use_gpu; - const auto& model_dir = _model_config._model_path; - const auto& model_filename = _model_config._model_file_name; - const auto& params_filename = _model_config._param_file_name; - - // load paddle model file - if (_model_config._predictor_mode == "NATIVE") { - paddle::NativeConfig config; - auto prog_file = utils::path_join(model_dir, model_filename); - auto param_file = utils::path_join(model_dir, params_filename); - config.prog_file = prog_file; - config.param_file = param_file; - config.fraction_of_gpu_memory = 0; - config.use_gpu = use_gpu; - config.device = 0; - _main_predictor = paddle::CreatePaddlePredictor(config); - } else if (_model_config._predictor_mode == "ANALYSIS") { - paddle::AnalysisConfig config; - if (use_gpu) { - config.EnableUseGpu(100, 0); - } - auto prog_file = utils::path_join(model_dir, model_filename); - auto param_file = utils::path_join(model_dir, params_filename); - config.SetModel(prog_file, param_file); - config.SwitchUseFeedFetchOps(false); - config.SwitchSpecifyInputNames(true); - config.EnableMemoryOptim(); - _main_predictor = paddle::CreatePaddlePredictor(config); - } else { - return -1; - } - return 0; - - } - - int DetectionPredictor::predict(const std::vector& imgs) { - if (_model_config._predictor_mode == "NATIVE") { - return native_predict(imgs); - } - else if (_model_config._predictor_mode == "ANALYSIS") { - return analysis_predict(imgs); - } - return -1; - } - - int DetectionPredictor::native_predict(const std::vector& imgs) { - int config_batch_size = _model_config._batch_size; - - int channels = _model_config._channels; - int eval_width = _model_config._resize[0]; - int eval_height = _model_config._resize[1]; - std::size_t total_size = imgs.size(); - int default_batch_size = std::min(config_batch_size, (int)total_size); - int batch = total_size / default_batch_size + ((total_size % default_batch_size) != 0); - int batch_buffer_size = default_batch_size * channels * eval_width * eval_height; - - auto& input_buffer = _buffer; - auto& imgs_batch = _imgs_batch; - float sr; - // DetectionResultsContainer result_container; - for (int u = 0; u < batch; ++u) { - int batch_size = default_batch_size; - if (u == (batch - 1) && (total_size % default_batch_size)) { - batch_size = total_size % default_batch_size; - } - - int real_buffer_size = batch_size * channels * eval_width * eval_height; - std::vector feeds; - input_buffer.clear(); - imgs_batch.clear(); - for (int i = 0; i < batch_size; ++i) { - int idx = u * default_batch_size + i; - imgs_batch.push_back(imgs[idx]); - } - std::vector ori_widths; - std::vector ori_heights; - std::vector resize_widths; - std::vector resize_heights; - std::vector scale_ratios; - ori_widths.resize(batch_size); - ori_heights.resize(batch_size); - resize_widths.resize(batch_size); - resize_heights.resize(batch_size); - scale_ratios.resize(batch_size); - std::vector> lod_buffer(batch_size); - if (!_preprocessor->batch_process(imgs_batch, lod_buffer, ori_widths.data(), ori_heights.data(), - resize_widths.data(), resize_heights.data(), scale_ratios.data())) { - return -1; - } - // flatten and padding - padding_minibatch(lod_buffer, input_buffer, resize_heights, resize_widths, channels, _model_config._coarsest_stride); - paddle::PaddleTensor im_tensor, im_size_tensor, im_info_tensor; - - im_tensor.name = "image"; - im_tensor.shape = std::vector({ batch_size, channels, resize_heights[0], resize_widths[0] }); - im_tensor.data.Reset(input_buffer.data(), input_buffer.size() * sizeof(float)); - im_tensor.dtype = paddle::PaddleDType::FLOAT32; - - std::vector image_infos; - for(int i = 0; i < batch_size; ++i) { - image_infos.push_back(resize_heights[i]); - image_infos.push_back(resize_widths[i]); - image_infos.push_back(scale_ratios[i]); - } - im_info_tensor.name = "info"; - im_info_tensor.shape = std::vector({batch_size, 3}); - im_info_tensor.data.Reset(image_infos.data(), batch_size * 3 * sizeof(float)); - im_info_tensor.dtype = paddle::PaddleDType::FLOAT32; - - std::vector image_size; - for(int i = 0; i < batch_size; ++i) { - image_size.push_back(ori_heights[i]); - image_size.push_back(ori_widths[i]); - } - - std::vector image_size_f; - for(int i = 0; i < batch_size; ++i) { - image_size_f.push_back(ori_heights[i]); - image_size_f.push_back(ori_widths[i]); - image_size_f.push_back(1.0); - } - - int feeds_size = _model_config._feeds_size; - im_size_tensor.name = "im_size"; - if(feeds_size == 2) { - im_size_tensor.shape = std::vector({ batch_size, 2}); - im_size_tensor.data.Reset(image_size.data(), batch_size * 2 * sizeof(int)); - im_size_tensor.dtype = paddle::PaddleDType::INT32; - } - else if(feeds_size == 3) { - im_size_tensor.shape = std::vector({ batch_size, 3}); - im_size_tensor.data.Reset(image_size_f.data(), batch_size * 3 * sizeof(float)); - im_size_tensor.dtype = paddle::PaddleDType::FLOAT32; - } - std::cout << "Feed size = " << feeds_size << std::endl; - feeds.push_back(im_tensor); - if(_model_config._feeds_size > 2) { - feeds.push_back(im_info_tensor); - } - feeds.push_back(im_size_tensor); - _outputs.clear(); - - auto t1 = std::chrono::high_resolution_clock::now(); - if (!_main_predictor->Run(feeds, &_outputs, batch_size)) { - LOG(ERROR) << "Failed: NativePredictor->Run() return false at batch: " << u; - continue; - } - auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "runtime = " << duration << std::endl; - std::cout << "Number of outputs:" << _outputs.size() << std::endl; - int out_num = 1; - // print shape of first output tensor for debugging - std::cout << "size of outputs[" << 0 << "]: ("; - for (int j = 0; j < _outputs[0].shape.size(); ++j) { - out_num *= _outputs[0].shape[j]; - std::cout << _outputs[0].shape[j] << ","; - } - std::cout << ")" << std::endl; - - // const size_t nums = _outputs.front().data.length() / sizeof(float); - // if (out_num % batch_size != 0 || out_num != nums) { - // LOG(ERROR) << "outputs data size mismatch with shape size."; - // return -1; - // } - float* out_addr = (float *)(_outputs[0].data.data()); - output_detection_result(out_addr, _outputs[0].lod, imgs_batch); - } - return 0; - } - - int DetectionPredictor::analysis_predict(const std::vector& imgs) { - - int config_batch_size = _model_config._batch_size; - int channels = _model_config._channels; - int eval_width = _model_config._resize[0]; - int eval_height = _model_config._resize[1]; - auto total_size = imgs.size(); - int default_batch_size = std::min(config_batch_size, (int)total_size); - int batch = total_size / default_batch_size + ((total_size % default_batch_size) != 0); - int batch_buffer_size = default_batch_size * channels * eval_width * eval_height; - - auto& input_buffer = _buffer; - auto& imgs_batch = _imgs_batch; - //DetectionResultsContainer result_container; - for (int u = 0; u < batch; ++u) { - int batch_size = default_batch_size; - if (u == (batch - 1) && (total_size % default_batch_size)) { - batch_size = total_size % default_batch_size; - } - - int real_buffer_size = batch_size * channels * eval_width * eval_height; - std::vector feeds; - //input_buffer.resize(real_buffer_size); - input_buffer.clear(); - imgs_batch.clear(); - for (int i = 0; i < batch_size; ++i) { - int idx = u * default_batch_size + i; - imgs_batch.push_back(imgs[idx]); - } - - std::vector ori_widths; - std::vector ori_heights; - std::vector resize_widths; - std::vector resize_heights; - std::vector scale_ratios; - ori_widths.resize(batch_size); - ori_heights.resize(batch_size); - resize_widths.resize(batch_size); - resize_heights.resize(batch_size); - scale_ratios.resize(batch_size); - - std::vector> lod_buffer(batch_size); - if (!_preprocessor->batch_process(imgs_batch, lod_buffer, ori_widths.data(), ori_heights.data(), - resize_widths.data(), resize_heights.data(), scale_ratios.data())){ - std::cout << "Failed to preprocess!" << std::endl; - return -1; - } - - //flatten tensor - padding_minibatch(lod_buffer, input_buffer, resize_heights, resize_widths, channels, _model_config._coarsest_stride); - - std::vector input_names = _main_predictor->GetInputNames(); - auto im_tensor = _main_predictor->GetInputTensor(input_names.front()); - im_tensor->Reshape({ batch_size, channels, resize_heights[0], resize_widths[0] }); - im_tensor->copy_from_cpu(input_buffer.data()); - - if(input_names.size() > 2){ - std::vector image_infos; - for(int i = 0; i < batch_size; ++i) { - image_infos.push_back(resize_heights[i]); - image_infos.push_back(resize_widths[i]); - image_infos.push_back(scale_ratios[i]); - } - auto im_info_tensor = _main_predictor->GetInputTensor(input_names[1]); - im_info_tensor->Reshape({batch_size, 3}); - im_info_tensor->copy_from_cpu(image_infos.data()); - } - - std::vector image_size; - for(int i = 0; i < batch_size; ++i) { - image_size.push_back(ori_heights[i]); - image_size.push_back(ori_widths[i]); - } - std::vector image_size_f; - for(int i = 0; i < batch_size; ++i) { - image_size_f.push_back(static_cast(ori_heights[i])); - image_size_f.push_back(static_cast(ori_widths[i])); - image_size_f.push_back(1.0); - } - - auto im_size_tensor = _main_predictor->GetInputTensor(input_names.back()); - if(input_names.size() > 2) { - im_size_tensor->Reshape({batch_size, 3}); - im_size_tensor->copy_from_cpu(image_size_f.data()); - } - else{ - im_size_tensor->Reshape({batch_size, 2}); - im_size_tensor->copy_from_cpu(image_size.data()); - } - - - auto t1 = std::chrono::high_resolution_clock::now(); - _main_predictor->ZeroCopyRun(); - auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(t2 - t1).count(); - std::cout << "runtime = " << duration << std::endl; - - auto output_names = _main_predictor->GetOutputNames(); - auto output_t = _main_predictor->GetOutputTensor(output_names[0]); - std::vector out_data; - std::vector output_shape = output_t->shape(); - - int out_num = 1; - std::cout << "size of outputs[" << 0 << "]: ("; - for (int j = 0; j < output_shape.size(); ++j) { - out_num *= output_shape[j]; - std::cout << output_shape[j] << ","; - } - std::cout << ")" << std::endl; - - out_data.resize(out_num); - output_t->copy_to_cpu(out_data.data()); - - float* out_addr = (float *)(out_data.data()); - auto lod_vector = output_t->lod(); - output_detection_result(out_addr, lod_vector, imgs_batch); - } - return 0; - } -} diff --git a/PaddleCV/PaddleDetection/inference/predictor/detection_predictor.h b/PaddleCV/PaddleDetection/inference/predictor/detection_predictor.h deleted file mode 100644 index 3bc4cfdd793291d7d89342c7fbccfdd558d1f004..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/predictor/detection_predictor.h +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -namespace PaddleSolution { - class DetectionPredictor { - public: - // init a predictor with a yaml config file - int init(const std::string& conf); - // predict api - int predict(const std::vector& imgs); - - private: - int native_predict(const std::vector& imgs); - int analysis_predict(const std::vector& imgs); - private: - std::vector _buffer; - std::vector _imgs_batch; - std::vector _outputs; - - PaddleSolution::PaddleModelConfigPaser _model_config; - std::shared_ptr _preprocessor; - std::unique_ptr _main_predictor; - }; -} diff --git a/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor.cpp b/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor.cpp deleted file mode 100644 index dbe7bcf624b649c02297bddd593d173b57550f17..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "preprocessor.h" -#include "preprocessor_detection.h" - -namespace PaddleSolution { - - std::shared_ptr create_processor(const std::string& conf_file) { - - auto config = std::make_shared(); - if (!config->load_config(conf_file)) { - LOG(FATAL) << "fail to laod conf file [" << conf_file << "]"; - return nullptr; - } - - if (config->_pre_processor == "DetectionPreProcessor") { - auto p = std::make_shared(); - if (!p->init(config)) { - return nullptr; - } - return p; - } - - - LOG(FATAL) << "unknown processor_name [" << config->_pre_processor << "]"; - - return nullptr; - } -} diff --git a/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor.h b/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor.h deleted file mode 100644 index a3fb2e029c8acf92010a258dd2824b85a0f2f90f..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor.h +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include -#include - -#include -#include -#include - -#include "utils/conf_parser.h" - -namespace PaddleSolution { - -class ImagePreProcessor { -protected: - ImagePreProcessor() {}; - -public: - virtual ~ImagePreProcessor() {} - - virtual bool single_process(const std::string& fname, float* data, int* ori_w, int* ori_h) { - return true; - } - - virtual bool batch_process(const std::vector& imgs, float* data, int* ori_w, int* ori_h) { - return true; - } - - virtual bool single_process(const std::string& fname, float* data) { - return true; - } - - virtual bool batch_process(const std::vector& imgs, float* data) { - return true; - } - - virtual bool single_process(const std::string& fname, std::vector &data, int* ori_w, int* ori_h, int* resize_w, int* resize_h, float* scale_ratio) { - return true; - } - - virtual bool batch_process(const std::vector& imgs, std::vector> &data, int* ori_w, int* ori_h, int* resize_w, int* resize_h, float* scale_ratio) { - return true; - } - -}; // end of class ImagePreProcessor - -std::shared_ptr create_processor(const std::string &config_file); - -} // end of namespace paddle_solution - diff --git a/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor_detection.cpp b/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor_detection.cpp deleted file mode 100644 index ba8fd0e328c5a859e2d4b88adba0e56e5e3a7476..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor_detection.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include - -#include "preprocessor_detection.h" -#include "utils/utils.h" - -namespace PaddleSolution { - bool DetectionPreProcessor::single_process(const std::string& fname, std::vector &vec_data, int* ori_w, int* ori_h, int* resize_w, int* resize_h, float* scale_ratio) { - cv::Mat im1 = cv::imread(fname, -1); - cv::Mat im; - if(_config->_feeds_size == 3) { // faster rcnn - im1.convertTo(im, CV_32FC3, 1/255.0); - } - else if(_config->_feeds_size == 2){ //yolo v3 - im = im1; - } - if (im.data == nullptr || im.empty()) { - LOG(ERROR) << "Failed to open image: " << fname; - return false; - } - - int channels = im.channels(); - if (channels == 1) { - cv::cvtColor(im, im, cv::COLOR_GRAY2BGR); - } - channels = im.channels(); - if (channels != 3 && channels != 4) { - LOG(ERROR) << "Only support rgb(gray) and rgba image."; - return false; - } - *ori_w = im.cols; - *ori_h = im.rows; - cv::cvtColor(im, im, cv::COLOR_BGR2RGB); - //channels = im.channels(); - - //resize - int rw = im.cols; - int rh = im.rows; - float im_scale_ratio; - utils::scaling(_config->_resize_type, rw, rh, _config->_resize[0], _config->_resize[1], _config->_target_short_size, _config->_resize_max_size, im_scale_ratio); - cv::Size resize_size(rw, rh); - *resize_w = rw; - *resize_h = rh; - *scale_ratio = im_scale_ratio; - if (*ori_h != rh || *ori_w != rw) { - cv::Mat im_temp; - if(_config->_resize_type == utils::SCALE_TYPE::UNPADDING) { - cv::resize(im, im_temp, resize_size, 0, 0, cv::INTER_LINEAR); - } - else if(_config->_resize_type == utils::SCALE_TYPE::RANGE_SCALING) { - cv::resize(im, im_temp, cv::Size(), im_scale_ratio, im_scale_ratio, cv::INTER_LINEAR); - } - im = im_temp; - } - - vec_data.resize(channels * rw * rh); - float *data = vec_data.data(); - - float* pmean = _config->_mean.data(); - float* pscale = _config->_std.data(); - for (int h = 0; h < rh; ++h) { - const uchar* uptr = im.ptr(h); - const float* fptr = im.ptr(h); - int im_index = 0; - for (int w = 0; w < rw; ++w) { - for (int c = 0; c < channels; ++c) { - int top_index = (c * rh + h) * rw + w; - float pixel;// = static_cast(fptr[im_index]);// / 255.0; - if(_config->_feeds_size == 2){ //yolo v3 - pixel = static_cast(uptr[im_index++]) / 255.0; - } - else if(_config->_feeds_size == 3){ - pixel = fptr[im_index++]; - } - pixel = (pixel - pmean[c]) / pscale[c]; - data[top_index] = pixel; - } - } - } - return true; - } - - bool DetectionPreProcessor::batch_process(const std::vector& imgs, std::vector> &data, int* ori_w, int* ori_h, int* resize_w, int* resize_h, float* scale_ratio) { - auto ic = _config->_channels; - auto iw = _config->_resize[0]; - auto ih = _config->_resize[1]; - std::vector threads; - for (int i = 0; i < imgs.size(); ++i) { - std::string path = imgs[i]; - int* width = &ori_w[i]; - int* height = &ori_h[i]; - int* resize_width = &resize_w[i]; - int* resize_height = &resize_h[i]; - float* sr = &scale_ratio[i]; - threads.emplace_back([this, &data, i, path, width, height, resize_width, resize_height, sr] { - std::vector buffer; - single_process(path, buffer, width, height, resize_width, resize_height, sr); - data[i] = buffer; - }); - } - for (auto& t : threads) { - if (t.joinable()) { - t.join(); - } - } - return true; - } - - bool DetectionPreProcessor::init(std::shared_ptr config) { - _config = config; - return true; - } - -} diff --git a/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor_detection.h b/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor_detection.h deleted file mode 100644 index 731329040423756151a2590d3ed0f46b2800191d..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/preprocessor/preprocessor_detection.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "preprocessor.h" - -namespace PaddleSolution { - - class DetectionPreProcessor : public ImagePreProcessor { - - public: - DetectionPreProcessor() : _config(nullptr) { - }; - - bool init(std::shared_ptr config); - - bool single_process(const std::string& fname, std::vector &data, int* ori_w, int* ori_h, int* resize_w, int* resize_h, float* scale_ratio); - - bool batch_process(const std::vector& imgs, std::vector> &data, int* ori_w, int* ori_h, int* resize_w, int* resize_h, float* scale_ratio); - private: - std::shared_ptr _config; - }; - -} diff --git a/PaddleCV/PaddleDetection/inference/tools/coco17.json b/PaddleCV/PaddleDetection/inference/tools/coco17.json deleted file mode 100644 index d3bbbaad038534baacf6f86f78db5d32bce16238..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/tools/coco17.json +++ /dev/null @@ -1,83 +0,0 @@ -{ - "0" : "background", - "1" : "person", - "2" : "bicycle", - "3" : "car", - "4" : "motorcycle", - "5" : "airplane", - "6" : "bus", - "7" : "train", - "8" : "truck", - "9" : "boat", - "10" : "traffic light", - "11" : "fire hydrant", - "12" : "stop sign", - "13" : "parking meter", - "14" : "bench", - "15" : "bird", - "16" : "cat", - "17" : "dog", - "18" : "horse", - "19" : "sheep", - "20" : "cow", - "21" : "elephant", - "22" : "bear", - "23" : "zebra", - "24" : "giraffe", - "25" : "backpack", - "26" : "umbrella", - "27" : "handbag", - "28" : "tie", - "29" : "suitcase", - "30" : "frisbee", - "31" : "skis", - "32" : "snowboard", - "33" : "sports ball", - "34" : "kite", - "35" : "baseball bat", - "36" : "baseball glove", - "37" : "skateboard", - "38" : "surfboard", - "39" : "tennis racket", - "40" : "bottle", - "41" : "wine glass", - "42" : "cup", - "43" : "fork", - "44" : "knife", - "45" : "spoon", - "46" : "bowl", - "47" : "banana", - "48" : "apple", - "49" : "sandwich", - "50" : "orange", - "51" : "broccoli", - "52" : "carrot", - "53" : "hot dog", - "54" : "pizza", - "55" : "donut", - "56" : "cake", - "57" : "chair", - "58" : "couch", - "59" : "potted plant", - "60" : "bed", - "61" : "dining table", - "62" : "toilet", - "63" : "tv", - "64" : "laptop", - "65" : "mouse", - "66" : "remote", - "67" : "keyboard", - "68" : "cell phone", - "69" : "microwave", - "70" : "oven", - "71" : "toaster", - "72" : "sink", - "73" : "refrigerator", - "74" : "book", - "75" : "clock", - "76" : "vase", - "77" : "scissors", - "78" : "teddy bear", - "79" : "hair drier", - "80" : "toothbrush" -} diff --git a/PaddleCV/PaddleDetection/inference/tools/detection_result_pb2.py b/PaddleCV/PaddleDetection/inference/tools/detection_result_pb2.py deleted file mode 100644 index 3dc66f368b6fea72f70d6a5685b19f23e8021d51..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/tools/detection_result_pb2.py +++ /dev/null @@ -1,151 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: detection_result.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='detection_result.proto', - package='PaddleSolution', - syntax='proto2', - serialized_pb=_b('\n\x16\x64\x65tection_result.proto\x12\x0ePaddleSolution\"\x84\x01\n\x0c\x44\x65tectionBox\x12\r\n\x05\x63lass\x18\x01 \x01(\x05\x12\r\n\x05score\x18\x02 \x01(\x02\x12\x12\n\nleft_top_x\x18\x03 \x01(\x02\x12\x12\n\nleft_top_y\x18\x04 \x01(\x02\x12\x16\n\x0eright_bottom_x\x18\x05 \x01(\x02\x12\x16\n\x0eright_bottom_y\x18\x06 \x01(\x02\"Z\n\x0f\x44\x65tectionResult\x12\x10\n\x08\x66ilename\x18\x01 \x01(\t\x12\x35\n\x0f\x64\x65tection_boxes\x18\x02 \x03(\x0b\x32\x1c.PaddleSolution.DetectionBox') -) -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - - - - -_DETECTIONBOX = _descriptor.Descriptor( - name='DetectionBox', - full_name='PaddleSolution.DetectionBox', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='class', full_name='PaddleSolution.DetectionBox.class', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='score', full_name='PaddleSolution.DetectionBox.score', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='left_top_x', full_name='PaddleSolution.DetectionBox.left_top_x', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='left_top_y', full_name='PaddleSolution.DetectionBox.left_top_y', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='right_bottom_x', full_name='PaddleSolution.DetectionBox.right_bottom_x', index=4, - number=5, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='right_bottom_y', full_name='PaddleSolution.DetectionBox.right_bottom_y', index=5, - number=6, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=43, - serialized_end=175, -) - - -_DETECTIONRESULT = _descriptor.Descriptor( - name='DetectionResult', - full_name='PaddleSolution.DetectionResult', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='filename', full_name='PaddleSolution.DetectionResult.filename', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='detection_boxes', full_name='PaddleSolution.DetectionResult.detection_boxes', index=1, - number=2, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=177, - serialized_end=267, -) - -_DETECTIONRESULT.fields_by_name['detection_boxes'].message_type = _DETECTIONBOX -DESCRIPTOR.message_types_by_name['DetectionBox'] = _DETECTIONBOX -DESCRIPTOR.message_types_by_name['DetectionResult'] = _DETECTIONRESULT - -DetectionBox = _reflection.GeneratedProtocolMessageType('DetectionBox', (_message.Message,), dict( - DESCRIPTOR = _DETECTIONBOX, - __module__ = 'detection_result_pb2' - # @@protoc_insertion_point(class_scope:PaddleSolution.DetectionBox) - )) -_sym_db.RegisterMessage(DetectionBox) - -DetectionResult = _reflection.GeneratedProtocolMessageType('DetectionResult', (_message.Message,), dict( - DESCRIPTOR = _DETECTIONRESULT, - __module__ = 'detection_result_pb2' - # @@protoc_insertion_point(class_scope:PaddleSolution.DetectionResult) - )) -_sym_db.RegisterMessage(DetectionResult) - - -# @@protoc_insertion_point(module_scope) diff --git a/PaddleCV/PaddleDetection/inference/tools/vis.py b/PaddleCV/PaddleDetection/inference/tools/vis.py deleted file mode 100644 index 1ca13bfbaf48669a78bf94344d378c37fe071f1a..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/tools/vis.py +++ /dev/null @@ -1,104 +0,0 @@ -# coding: utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import detection_result_pb2 -import cv2 -import sys -import gflags -import numpy as np -import json -from PIL import Image, ImageDraw, ImageFont - -Flags = gflags.FLAGS -gflags.DEFINE_string('img_path', 'abc', 'image path') -gflags.DEFINE_string('img_result_path', 'def', 'image result path') -gflags.DEFINE_float('threshold', 0.0, 'threshold of score') -gflags.DEFINE_string('c2l_path', 'ghk', 'class to label path') - -def colormap(rgb=False): - """ - Get colormap - """ - color_list = np.array([ - 0.000, 0.447, 0.741, 0.850, 0.325, 0.098, 0.929, 0.694, 0.125, 0.494, - 0.184, 0.556, 0.466, 0.674, 0.188, 0.301, 0.745, 0.933, 0.635, 0.078, - 0.184, 0.300, 0.300, 0.300, 0.600, 0.600, 0.600, 1.000, 0.000, 0.000, - 1.000, 0.500, 0.000, 0.749, 0.749, 0.000, 0.000, 1.000, 0.000, 0.000, - 0.000, 1.000, 0.667, 0.000, 1.000, 0.333, 0.333, 0.000, 0.333, 0.667, - 0.000, 0.333, 1.000, 0.000, 0.667, 0.333, 0.000, 0.667, 0.667, 0.000, - 0.667, 1.000, 0.000, 1.000, 0.333, 0.000, 1.000, 0.667, 0.000, 1.000, - 1.000, 0.000, 0.000, 0.333, 0.500, 0.000, 0.667, 0.500, 0.000, 1.000, - 0.500, 0.333, 0.000, 0.500, 0.333, 0.333, 0.500, 0.333, 0.667, 0.500, - 0.333, 1.000, 0.500, 0.667, 0.000, 0.500, 0.667, 0.333, 0.500, 0.667, - 0.667, 0.500, 0.667, 1.000, 0.500, 1.000, 0.000, 0.500, 1.000, 0.333, - 0.500, 1.000, 0.667, 0.500, 1.000, 1.000, 0.500, 0.000, 0.333, 1.000, - 0.000, 0.667, 1.000, 0.000, 1.000, 1.000, 0.333, 0.000, 1.000, 0.333, - 0.333, 1.000, 0.333, 0.667, 1.000, 0.333, 1.000, 1.000, 0.667, 0.000, - 1.000, 0.667, 0.333, 1.000, 0.667, 0.667, 1.000, 0.667, 1.000, 1.000, - 1.000, 0.000, 1.000, 1.000, 0.333, 1.000, 1.000, 0.667, 1.000, 0.167, - 0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, - 0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, - 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, - 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000, - 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, - 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.143, 0.143, 0.143, 0.286, - 0.286, 0.286, 0.429, 0.429, 0.429, 0.571, 0.571, 0.571, 0.714, 0.714, - 0.714, 0.857, 0.857, 0.857, 1.000, 1.000, 1.000 - ]).astype(np.float32) - color_list = color_list.reshape((-1, 3)) * 255 - if not rgb: - color_list = color_list[:, ::-1] - return color_list - -if __name__ == "__main__": - if len(sys.argv) != 5: - print("Usage: python vis.py --img_path=/path/to/image --img_result_path=/path/to/image_result.pb --threshold=0.1 --c2l_path=/path/to/class2label.json") - else: - Flags(sys.argv) - color_list = colormap(rgb=True) - text_thickness = 1 - text_scale = 0.3 - with open(Flags.img_result_path, "rb") as f: - detection_result = detection_result_pb2.DetectionResult() - detection_result.ParseFromString(f.read()) - img = cv2.imread(Flags.img_path) - img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - class2LabelMap = dict() - with open(Flags.c2l_path, "r", encoding="utf-8") as json_f: - class2LabelMap = json.load(json_f) - for box in detection_result.detection_boxes: - if box.score >= Flags.threshold: - box_class = getattr(box, 'class') - text_class_score_str = "%s %.2f" % (class2LabelMap.get(str(box_class)), box.score) - text_point = (int(box.left_top_x), int(box.left_top_y)) - - ptLeftTop = (int(box.left_top_x), int(box.left_top_y)) - ptRightBottom = (int(box.right_bottom_x), int(box.right_bottom_y)) - box_thickness = 1 - color = tuple([int(c) for c in color_list[box_class]]) - cv2.rectangle(img, ptLeftTop, ptRightBottom, color, box_thickness, 8) - if text_point[1] < 0: - text_point = (int(box.left_top_x), int(box.right_bottom_y)) - WHITE = (255, 255, 255) - font = cv2.FONT_HERSHEY_SIMPLEX - text_size = cv2.getTextSize(text_class_score_str, font, text_scale, text_thickness) - - text_box_left_top = (text_point[0], text_point[1] - text_size[0][1]) - text_box_right_bottom = (text_point[0] + text_size[0][0], text_point[1]) - - cv2.rectangle(img, text_box_left_top, text_box_right_bottom, color, -1, 8) - cv2.putText(img, text_class_score_str, text_point, font, text_scale, WHITE, text_thickness) - img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) - cv2.imwrite(Flags.img_path + ".png", img) diff --git a/PaddleCV/PaddleDetection/inference/utils/conf_parser.h b/PaddleCV/PaddleDetection/inference/utils/conf_parser.h deleted file mode 100644 index 21944d032b2c24cdb584dc076a696560d4665ea1..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/utils/conf_parser.h +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include -#include -#include - -#include -namespace PaddleSolution { - - class PaddleModelConfigPaser { - std::map _scaling_map; - public: - PaddleModelConfigPaser() - :_class_num(0), - _channels(0), - _use_gpu(0), - _batch_size(1), - _target_short_size(0), - _model_file_name("__model__"), - _param_file_name("__params__"), - _scaling_map{{"UNPADDING", 0}, - {"RANGE_SCALING",1}}, - _feeds_size(1), - _coarsest_stride(1) - { - } - ~PaddleModelConfigPaser() { - } - - void reset() { - _crop_size.clear(); - _resize.clear(); - _mean.clear(); - _std.clear(); - _img_type.clear(); - _class_num = 0; - _channels = 0; - _use_gpu = 0; - _target_short_size = 0; - _batch_size = 1; - _model_file_name = "__model__"; - _model_path = "./"; - _param_file_name="__params__"; - _resize_type = 0; - _resize_max_size = 0; - _feeds_size = 1; - _coarsest_stride = 1; - } - - std::string process_parenthesis(const std::string& str) { - if (str.size() < 2) { - return str; - } - std::string nstr(str); - if (str[0] == '(' && str.back() == ')') { - nstr[0] = '['; - nstr[str.size() - 1] = ']'; - } - return nstr; - } - - template - std::vector parse_str_to_vec(const std::string& str) { - std::vector data; - auto node = YAML::Load(str); - for (const auto& item : node) { - data.push_back(item.as()); - } - return data; - } - - bool load_config(const std::string& conf_file) { - - reset(); - - YAML::Node config = YAML::LoadFile(conf_file); - // 1. get resize - auto str = config["DEPLOY"]["EVAL_CROP_SIZE"].as(); - _resize = parse_str_to_vec(process_parenthesis(str)); - - // 0. get crop_size - if(config["DEPLOY"]["CROP_SIZE"].IsDefined()) { - auto crop_str = config["DEPLOY"]["CROP_SIZE"].as(); - _crop_size = parse_str_to_vec(process_parenthesis(crop_str)); - } - else { - _crop_size = _resize; - } - - // 2. get mean - for (const auto& item : config["DEPLOY"]["MEAN"]) { - _mean.push_back(item.as()); - } - - // 3. get std - for (const auto& item : config["DEPLOY"]["STD"]) { - _std.push_back(item.as()); - } - - // 4. get image type - _img_type = config["DEPLOY"]["IMAGE_TYPE"].as(); - // 5. get class number - _class_num = config["DEPLOY"]["NUM_CLASSES"].as(); - // 7. set model path - _model_path = config["DEPLOY"]["MODEL_PATH"].as(); - // 8. get model file_name - _model_file_name = config["DEPLOY"]["MODEL_FILENAME"].as(); - // 9. get model param file name - _param_file_name = config["DEPLOY"]["PARAMS_FILENAME"].as(); - // 10. get pre_processor - _pre_processor = config["DEPLOY"]["PRE_PROCESSOR"].as(); - // 11. use_gpu - _use_gpu = config["DEPLOY"]["USE_GPU"].as(); - // 12. predictor_mode - _predictor_mode = config["DEPLOY"]["PREDICTOR_MODE"].as(); - // 13. batch_size - _batch_size = config["DEPLOY"]["BATCH_SIZE"].as(); - // 14. channels - _channels = config["DEPLOY"]["CHANNELS"].as(); - // 15. target_short_size - if(config["DEPLOY"]["TARGET_SHORT_SIZE"].IsDefined()) { - _target_short_size = config["DEPLOY"]["TARGET_SHORT_SIZE"].as(); - } - // 16.resize_type - if(config["DEPLOY"]["RESIZE_TYPE"].IsDefined() && - _scaling_map.find(config["DEPLOY"]["RESIZE_TYPE"].as()) != _scaling_map.end()) { - _resize_type = _scaling_map[config["DEPLOY"]["RESIZE_TYPE"].as()]; - } - else{ - _resize_type = 0; - } - // 17.resize_max_size - if(config["DEPLOY"]["RESIZE_MAX_SIZE"].IsDefined()) { - _resize_max_size = config["DEPLOY"]["RESIZE_MAX_SIZE"].as(); - } - // 18.feeds_size - if(config["DEPLOY"]["FEEDS_SIZE"].IsDefined()){ - _feeds_size = config["DEPLOY"]["FEEDS_SIZE"].as(); - } - // 19. coarsest_stride - if(config["DEPLOY"]["COARSEST_STRIDE"].IsDefined()) { - _coarsest_stride = config["DEPLOY"]["COARSEST_STRIDE"].as(); - } - return true; - } - - void debug() const { - - std::cout << "SCALE_RESIZE: (" << _resize[0] << ", " << _resize[1] << ")" << std::endl; - - std::cout << "MEAN: ["; - for (int i = 0; i < _mean.size(); ++i) { - if (i != _mean.size() - 1) { - std::cout << _mean[i] << ", "; - } else { - std::cout << _mean[i]; - } - } - std::cout << "]" << std::endl; - - std::cout << "STD: ["; - for (int i = 0; i < _std.size(); ++i) { - if (i != _std.size() - 1) { - std::cout << _std[i] << ", "; - } - else { - std::cout << _std[i]; - } - } - std::cout << "]" << std::endl; - std::cout << "DEPLOY.TARGET_SHORT_SIZE: " << _target_short_size << std::endl; - std::cout << "DEPLOY.IMAGE_TYPE: " << _img_type << std::endl; - std::cout << "DEPLOY.NUM_CLASSES: " << _class_num << std::endl; - std::cout << "DEPLOY.CHANNELS: " << _channels << std::endl; - std::cout << "DEPLOY.MODEL_PATH: " << _model_path << std::endl; - std::cout << "DEPLOY.MODEL_FILENAME: " << _model_file_name << std::endl; - std::cout << "DEPLOY.PARAMS_FILENAME: " << _param_file_name << std::endl; - std::cout << "DEPLOY.PRE_PROCESSOR: " << _pre_processor << std::endl; - std::cout << "DEPLOY.USE_GPU: " << _use_gpu << std::endl; - std::cout << "DEPLOY.PREDICTOR_MODE: " << _predictor_mode << std::endl; - std::cout << "DEPLOY.BATCH_SIZE: " << _batch_size << std::endl; - } - //DEPLOY.COARSEST_STRIDE - int _coarsest_stride; - // DEPLOY.FEEDS_SIZE - int _feeds_size; - // DEPLOY.RESIZE_TYPE 0:unpadding 1:rangescaling Default:0 - int _resize_type; - // DEPLOY.RESIZE_MAX_SIZE - int _resize_max_size; - // DEPLOY.CROP_SIZE - std::vector _crop_size; - // DEPLOY.SCALE_RESIZE - std::vector _resize; - // DEPLOY.MEAN - std::vector _mean; - // DEPLOY.STD - std::vector _std; - // DEPLOY.IMAGE_TYPE - std::string _img_type; - // DEPLOY.TARGET_SHORT_SIZE - int _target_short_size; - // DEPLOY.NUM_CLASSES - int _class_num; - // DEPLOY.CHANNELS - int _channels; - // DEPLOY.MODEL_PATH - std::string _model_path; - // DEPLOY.MODEL_FILENAME - std::string _model_file_name; - // DEPLOY.PARAMS_FILENAME - std::string _param_file_name; - // DEPLOY.PRE_PROCESSOR - std::string _pre_processor; - // DEPLOY.USE_GPU - int _use_gpu; - // DEPLOY.PREDICTOR_MODE - std::string _predictor_mode; - // DEPLOY.BATCH_SIZE - int _batch_size; - }; - -} diff --git a/PaddleCV/PaddleDetection/inference/utils/detection_result.pb.cc b/PaddleCV/PaddleDetection/inference/utils/detection_result.pb.cc deleted file mode 100644 index b5cce7317914cf93f99d0d4efa3aee763972cc4e..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/utils/detection_result.pb.cc +++ /dev/null @@ -1,1159 +0,0 @@ -// Generated by the protocol buffer compiler. DO NOT EDIT! -// source: detection_result.proto - -#define INTERNAL_SUPPRESS_PROTOBUF_FIELD_DEPRECATION -#include "detection_result.pb.h" - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -// @@protoc_insertion_point(includes) - -namespace PaddleSolution { - -namespace { - -const ::google::protobuf::Descriptor* DetectionBox_descriptor_ = NULL; -const ::google::protobuf::internal::GeneratedMessageReflection* - DetectionBox_reflection_ = NULL; -const ::google::protobuf::Descriptor* DetectionResult_descriptor_ = NULL; -const ::google::protobuf::internal::GeneratedMessageReflection* - DetectionResult_reflection_ = NULL; - -} // namespace - - -void protobuf_AssignDesc_detection_5fresult_2eproto() GOOGLE_ATTRIBUTE_COLD; -void protobuf_AssignDesc_detection_5fresult_2eproto() { - protobuf_AddDesc_detection_5fresult_2eproto(); - const ::google::protobuf::FileDescriptor* file = - ::google::protobuf::DescriptorPool::generated_pool()->FindFileByName( - "detection_result.proto"); - GOOGLE_CHECK(file != NULL); - DetectionBox_descriptor_ = file->message_type(0); - static const int DetectionBox_offsets_[6] = { - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, class__), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, score_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, left_top_x_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, left_top_y_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, right_bottom_x_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, right_bottom_y_), - }; - DetectionBox_reflection_ = - ::google::protobuf::internal::GeneratedMessageReflection::NewGeneratedMessageReflection( - DetectionBox_descriptor_, - DetectionBox::internal_default_instance(), - DetectionBox_offsets_, - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, _has_bits_), - -1, - -1, - sizeof(DetectionBox), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionBox, _internal_metadata_)); - DetectionResult_descriptor_ = file->message_type(1); - static const int DetectionResult_offsets_[2] = { - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionResult, filename_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionResult, detection_boxes_), - }; - DetectionResult_reflection_ = - ::google::protobuf::internal::GeneratedMessageReflection::NewGeneratedMessageReflection( - DetectionResult_descriptor_, - DetectionResult::internal_default_instance(), - DetectionResult_offsets_, - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionResult, _has_bits_), - -1, - -1, - sizeof(DetectionResult), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DetectionResult, _internal_metadata_)); -} - -namespace { - -GOOGLE_PROTOBUF_DECLARE_ONCE(protobuf_AssignDescriptors_once_); -void protobuf_AssignDescriptorsOnce() { - ::google::protobuf::GoogleOnceInit(&protobuf_AssignDescriptors_once_, - &protobuf_AssignDesc_detection_5fresult_2eproto); -} - -void protobuf_RegisterTypes(const ::std::string&) GOOGLE_ATTRIBUTE_COLD; -void protobuf_RegisterTypes(const ::std::string&) { - protobuf_AssignDescriptorsOnce(); - ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( - DetectionBox_descriptor_, DetectionBox::internal_default_instance()); - ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( - DetectionResult_descriptor_, DetectionResult::internal_default_instance()); -} - -} // namespace - -void protobuf_ShutdownFile_detection_5fresult_2eproto() { - DetectionBox_default_instance_.Shutdown(); - delete DetectionBox_reflection_; - DetectionResult_default_instance_.Shutdown(); - delete DetectionResult_reflection_; -} - -void protobuf_InitDefaults_detection_5fresult_2eproto_impl() { - GOOGLE_PROTOBUF_VERIFY_VERSION; - - DetectionBox_default_instance_.DefaultConstruct(); - ::google::protobuf::internal::GetEmptyString(); - DetectionResult_default_instance_.DefaultConstruct(); - DetectionBox_default_instance_.get_mutable()->InitAsDefaultInstance(); - DetectionResult_default_instance_.get_mutable()->InitAsDefaultInstance(); -} - -GOOGLE_PROTOBUF_DECLARE_ONCE(protobuf_InitDefaults_detection_5fresult_2eproto_once_); -void protobuf_InitDefaults_detection_5fresult_2eproto() { - ::google::protobuf::GoogleOnceInit(&protobuf_InitDefaults_detection_5fresult_2eproto_once_, - &protobuf_InitDefaults_detection_5fresult_2eproto_impl); -} -void protobuf_AddDesc_detection_5fresult_2eproto_impl() { - GOOGLE_PROTOBUF_VERIFY_VERSION; - - protobuf_InitDefaults_detection_5fresult_2eproto(); - ::google::protobuf::DescriptorPool::InternalAddGeneratedFile( - "\n\026detection_result.proto\022\016PaddleSolution" - "\"\204\001\n\014DetectionBox\022\r\n\005class\030\001 \001(\005\022\r\n\005scor" - "e\030\002 \001(\002\022\022\n\nleft_top_x\030\003 \001(\002\022\022\n\nleft_top_" - "y\030\004 \001(\002\022\026\n\016right_bottom_x\030\005 \001(\002\022\026\n\016right" - "_bottom_y\030\006 \001(\002\"Z\n\017DetectionResult\022\020\n\010fi" - "lename\030\001 \001(\t\0225\n\017detection_boxes\030\002 \003(\0132\034." - "PaddleSolution.DetectionBox", 267); - ::google::protobuf::MessageFactory::InternalRegisterGeneratedFile( - "detection_result.proto", &protobuf_RegisterTypes); - ::google::protobuf::internal::OnShutdown(&protobuf_ShutdownFile_detection_5fresult_2eproto); -} - -GOOGLE_PROTOBUF_DECLARE_ONCE(protobuf_AddDesc_detection_5fresult_2eproto_once_); -void protobuf_AddDesc_detection_5fresult_2eproto() { - ::google::protobuf::GoogleOnceInit(&protobuf_AddDesc_detection_5fresult_2eproto_once_, - &protobuf_AddDesc_detection_5fresult_2eproto_impl); -} -// Force AddDescriptors() to be called at static initialization time. -struct StaticDescriptorInitializer_detection_5fresult_2eproto { - StaticDescriptorInitializer_detection_5fresult_2eproto() { - protobuf_AddDesc_detection_5fresult_2eproto(); - } -} static_descriptor_initializer_detection_5fresult_2eproto_; - -namespace { - -static void MergeFromFail(int line) GOOGLE_ATTRIBUTE_COLD GOOGLE_ATTRIBUTE_NORETURN; -static void MergeFromFail(int line) { - ::google::protobuf::internal::MergeFromFail(__FILE__, line); -} - -} // namespace - - -// =================================================================== - -#if !defined(_MSC_VER) || _MSC_VER >= 1900 -const int DetectionBox::kClassFieldNumber; -const int DetectionBox::kScoreFieldNumber; -const int DetectionBox::kLeftTopXFieldNumber; -const int DetectionBox::kLeftTopYFieldNumber; -const int DetectionBox::kRightBottomXFieldNumber; -const int DetectionBox::kRightBottomYFieldNumber; -#endif // !defined(_MSC_VER) || _MSC_VER >= 1900 - -DetectionBox::DetectionBox() - : ::google::protobuf::Message(), _internal_metadata_(NULL) { - if (this != internal_default_instance()) protobuf_InitDefaults_detection_5fresult_2eproto(); - SharedCtor(); - // @@protoc_insertion_point(constructor:PaddleSolution.DetectionBox) -} - -void DetectionBox::InitAsDefaultInstance() { -} - -DetectionBox::DetectionBox(const DetectionBox& from) - : ::google::protobuf::Message(), - _internal_metadata_(NULL) { - SharedCtor(); - UnsafeMergeFrom(from); - // @@protoc_insertion_point(copy_constructor:PaddleSolution.DetectionBox) -} - -void DetectionBox::SharedCtor() { - _cached_size_ = 0; - ::memset(&class__, 0, reinterpret_cast(&right_bottom_y_) - - reinterpret_cast(&class__) + sizeof(right_bottom_y_)); -} - -DetectionBox::~DetectionBox() { - // @@protoc_insertion_point(destructor:PaddleSolution.DetectionBox) - SharedDtor(); -} - -void DetectionBox::SharedDtor() { -} - -void DetectionBox::SetCachedSize(int size) const { - GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN(); - _cached_size_ = size; - GOOGLE_SAFE_CONCURRENT_WRITES_END(); -} -const ::google::protobuf::Descriptor* DetectionBox::descriptor() { - protobuf_AssignDescriptorsOnce(); - return DetectionBox_descriptor_; -} - -const DetectionBox& DetectionBox::default_instance() { - protobuf_InitDefaults_detection_5fresult_2eproto(); - return *internal_default_instance(); -} - -::google::protobuf::internal::ExplicitlyConstructed DetectionBox_default_instance_; - -DetectionBox* DetectionBox::New(::google::protobuf::Arena* arena) const { - DetectionBox* n = new DetectionBox; - if (arena != NULL) { - arena->Own(n); - } - return n; -} - -void DetectionBox::Clear() { -// @@protoc_insertion_point(message_clear_start:PaddleSolution.DetectionBox) -#if defined(__clang__) -#define ZR_HELPER_(f) \ - _Pragma("clang diagnostic push") \ - _Pragma("clang diagnostic ignored \"-Winvalid-offsetof\"") \ - __builtin_offsetof(DetectionBox, f) \ - _Pragma("clang diagnostic pop") -#else -#define ZR_HELPER_(f) reinterpret_cast(\ - &reinterpret_cast(16)->f) -#endif - -#define ZR_(first, last) do {\ - ::memset(&(first), 0,\ - ZR_HELPER_(last) - ZR_HELPER_(first) + sizeof(last));\ -} while (0) - - ZR_(class__, right_bottom_y_); - -#undef ZR_HELPER_ -#undef ZR_ - - _has_bits_.Clear(); - if (_internal_metadata_.have_unknown_fields()) { - mutable_unknown_fields()->Clear(); - } -} - -bool DetectionBox::MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) { -#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure - ::google::protobuf::uint32 tag; - // @@protoc_insertion_point(parse_start:PaddleSolution.DetectionBox) - for (;;) { - ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoff(127); - tag = p.first; - if (!p.second) goto handle_unusual; - switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { - // optional int32 class = 1; - case 1: { - if (tag == 8) { - set_has_class_(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &class__))); - } else { - goto handle_unusual; - } - if (input->ExpectTag(21)) goto parse_score; - break; - } - - // optional float score = 2; - case 2: { - if (tag == 21) { - parse_score: - set_has_score(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>( - input, &score_))); - } else { - goto handle_unusual; - } - if (input->ExpectTag(29)) goto parse_left_top_x; - break; - } - - // optional float left_top_x = 3; - case 3: { - if (tag == 29) { - parse_left_top_x: - set_has_left_top_x(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>( - input, &left_top_x_))); - } else { - goto handle_unusual; - } - if (input->ExpectTag(37)) goto parse_left_top_y; - break; - } - - // optional float left_top_y = 4; - case 4: { - if (tag == 37) { - parse_left_top_y: - set_has_left_top_y(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>( - input, &left_top_y_))); - } else { - goto handle_unusual; - } - if (input->ExpectTag(45)) goto parse_right_bottom_x; - break; - } - - // optional float right_bottom_x = 5; - case 5: { - if (tag == 45) { - parse_right_bottom_x: - set_has_right_bottom_x(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>( - input, &right_bottom_x_))); - } else { - goto handle_unusual; - } - if (input->ExpectTag(53)) goto parse_right_bottom_y; - break; - } - - // optional float right_bottom_y = 6; - case 6: { - if (tag == 53) { - parse_right_bottom_y: - set_has_right_bottom_y(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>( - input, &right_bottom_y_))); - } else { - goto handle_unusual; - } - if (input->ExpectAtEnd()) goto success; - break; - } - - default: { - handle_unusual: - if (tag == 0 || - ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == - ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) { - goto success; - } - DO_(::google::protobuf::internal::WireFormat::SkipField( - input, tag, mutable_unknown_fields())); - break; - } - } - } -success: - // @@protoc_insertion_point(parse_success:PaddleSolution.DetectionBox) - return true; -failure: - // @@protoc_insertion_point(parse_failure:PaddleSolution.DetectionBox) - return false; -#undef DO_ -} - -void DetectionBox::SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const { - // @@protoc_insertion_point(serialize_start:PaddleSolution.DetectionBox) - // optional int32 class = 1; - if (has_class_()) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(1, this->class_(), output); - } - - // optional float score = 2; - if (has_score()) { - ::google::protobuf::internal::WireFormatLite::WriteFloat(2, this->score(), output); - } - - // optional float left_top_x = 3; - if (has_left_top_x()) { - ::google::protobuf::internal::WireFormatLite::WriteFloat(3, this->left_top_x(), output); - } - - // optional float left_top_y = 4; - if (has_left_top_y()) { - ::google::protobuf::internal::WireFormatLite::WriteFloat(4, this->left_top_y(), output); - } - - // optional float right_bottom_x = 5; - if (has_right_bottom_x()) { - ::google::protobuf::internal::WireFormatLite::WriteFloat(5, this->right_bottom_x(), output); - } - - // optional float right_bottom_y = 6; - if (has_right_bottom_y()) { - ::google::protobuf::internal::WireFormatLite::WriteFloat(6, this->right_bottom_y(), output); - } - - if (_internal_metadata_.have_unknown_fields()) { - ::google::protobuf::internal::WireFormat::SerializeUnknownFields( - unknown_fields(), output); - } - // @@protoc_insertion_point(serialize_end:PaddleSolution.DetectionBox) -} - -::google::protobuf::uint8* DetectionBox::InternalSerializeWithCachedSizesToArray( - bool deterministic, ::google::protobuf::uint8* target) const { - (void)deterministic; // Unused - // @@protoc_insertion_point(serialize_to_array_start:PaddleSolution.DetectionBox) - // optional int32 class = 1; - if (has_class_()) { - target = ::google::protobuf::internal::WireFormatLite::WriteInt32ToArray(1, this->class_(), target); - } - - // optional float score = 2; - if (has_score()) { - target = ::google::protobuf::internal::WireFormatLite::WriteFloatToArray(2, this->score(), target); - } - - // optional float left_top_x = 3; - if (has_left_top_x()) { - target = ::google::protobuf::internal::WireFormatLite::WriteFloatToArray(3, this->left_top_x(), target); - } - - // optional float left_top_y = 4; - if (has_left_top_y()) { - target = ::google::protobuf::internal::WireFormatLite::WriteFloatToArray(4, this->left_top_y(), target); - } - - // optional float right_bottom_x = 5; - if (has_right_bottom_x()) { - target = ::google::protobuf::internal::WireFormatLite::WriteFloatToArray(5, this->right_bottom_x(), target); - } - - // optional float right_bottom_y = 6; - if (has_right_bottom_y()) { - target = ::google::protobuf::internal::WireFormatLite::WriteFloatToArray(6, this->right_bottom_y(), target); - } - - if (_internal_metadata_.have_unknown_fields()) { - target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray( - unknown_fields(), target); - } - // @@protoc_insertion_point(serialize_to_array_end:PaddleSolution.DetectionBox) - return target; -} - -size_t DetectionBox::ByteSizeLong() const { -// @@protoc_insertion_point(message_byte_size_start:PaddleSolution.DetectionBox) - size_t total_size = 0; - - if (_has_bits_[0 / 32] & 63u) { - // optional int32 class = 1; - if (has_class_()) { - total_size += 1 + - ::google::protobuf::internal::WireFormatLite::Int32Size( - this->class_()); - } - - // optional float score = 2; - if (has_score()) { - total_size += 1 + 4; - } - - // optional float left_top_x = 3; - if (has_left_top_x()) { - total_size += 1 + 4; - } - - // optional float left_top_y = 4; - if (has_left_top_y()) { - total_size += 1 + 4; - } - - // optional float right_bottom_x = 5; - if (has_right_bottom_x()) { - total_size += 1 + 4; - } - - // optional float right_bottom_y = 6; - if (has_right_bottom_y()) { - total_size += 1 + 4; - } - - } - if (_internal_metadata_.have_unknown_fields()) { - total_size += - ::google::protobuf::internal::WireFormat::ComputeUnknownFieldsSize( - unknown_fields()); - } - int cached_size = ::google::protobuf::internal::ToCachedSize(total_size); - GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN(); - _cached_size_ = cached_size; - GOOGLE_SAFE_CONCURRENT_WRITES_END(); - return total_size; -} - -void DetectionBox::MergeFrom(const ::google::protobuf::Message& from) { -// @@protoc_insertion_point(generalized_merge_from_start:PaddleSolution.DetectionBox) - if (GOOGLE_PREDICT_FALSE(&from == this)) MergeFromFail(__LINE__); - const DetectionBox* source = - ::google::protobuf::internal::DynamicCastToGenerated( - &from); - if (source == NULL) { - // @@protoc_insertion_point(generalized_merge_from_cast_fail:PaddleSolution.DetectionBox) - ::google::protobuf::internal::ReflectionOps::Merge(from, this); - } else { - // @@protoc_insertion_point(generalized_merge_from_cast_success:PaddleSolution.DetectionBox) - UnsafeMergeFrom(*source); - } -} - -void DetectionBox::MergeFrom(const DetectionBox& from) { -// @@protoc_insertion_point(class_specific_merge_from_start:PaddleSolution.DetectionBox) - if (GOOGLE_PREDICT_TRUE(&from != this)) { - UnsafeMergeFrom(from); - } else { - MergeFromFail(__LINE__); - } -} - -void DetectionBox::UnsafeMergeFrom(const DetectionBox& from) { - GOOGLE_DCHECK(&from != this); - if (from._has_bits_[0 / 32] & (0xffu << (0 % 32))) { - if (from.has_class_()) { - set_class_(from.class_()); - } - if (from.has_score()) { - set_score(from.score()); - } - if (from.has_left_top_x()) { - set_left_top_x(from.left_top_x()); - } - if (from.has_left_top_y()) { - set_left_top_y(from.left_top_y()); - } - if (from.has_right_bottom_x()) { - set_right_bottom_x(from.right_bottom_x()); - } - if (from.has_right_bottom_y()) { - set_right_bottom_y(from.right_bottom_y()); - } - } - if (from._internal_metadata_.have_unknown_fields()) { - ::google::protobuf::UnknownFieldSet::MergeToInternalMetdata( - from.unknown_fields(), &_internal_metadata_); - } -} - -void DetectionBox::CopyFrom(const ::google::protobuf::Message& from) { -// @@protoc_insertion_point(generalized_copy_from_start:PaddleSolution.DetectionBox) - if (&from == this) return; - Clear(); - MergeFrom(from); -} - -void DetectionBox::CopyFrom(const DetectionBox& from) { -// @@protoc_insertion_point(class_specific_copy_from_start:PaddleSolution.DetectionBox) - if (&from == this) return; - Clear(); - UnsafeMergeFrom(from); -} - -bool DetectionBox::IsInitialized() const { - - return true; -} - -void DetectionBox::Swap(DetectionBox* other) { - if (other == this) return; - InternalSwap(other); -} -void DetectionBox::InternalSwap(DetectionBox* other) { - std::swap(class__, other->class__); - std::swap(score_, other->score_); - std::swap(left_top_x_, other->left_top_x_); - std::swap(left_top_y_, other->left_top_y_); - std::swap(right_bottom_x_, other->right_bottom_x_); - std::swap(right_bottom_y_, other->right_bottom_y_); - std::swap(_has_bits_[0], other->_has_bits_[0]); - _internal_metadata_.Swap(&other->_internal_metadata_); - std::swap(_cached_size_, other->_cached_size_); -} - -::google::protobuf::Metadata DetectionBox::GetMetadata() const { - protobuf_AssignDescriptorsOnce(); - ::google::protobuf::Metadata metadata; - metadata.descriptor = DetectionBox_descriptor_; - metadata.reflection = DetectionBox_reflection_; - return metadata; -} - -#if PROTOBUF_INLINE_NOT_IN_HEADERS -// DetectionBox - -// optional int32 class = 1; -bool DetectionBox::has_class_() const { - return (_has_bits_[0] & 0x00000001u) != 0; -} -void DetectionBox::set_has_class_() { - _has_bits_[0] |= 0x00000001u; -} -void DetectionBox::clear_has_class_() { - _has_bits_[0] &= ~0x00000001u; -} -void DetectionBox::clear_class_() { - class__ = 0; - clear_has_class_(); -} -::google::protobuf::int32 DetectionBox::class_() const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.class) - return class__; -} -void DetectionBox::set_class_(::google::protobuf::int32 value) { - set_has_class_(); - class__ = value; - // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.class) -} - -// optional float score = 2; -bool DetectionBox::has_score() const { - return (_has_bits_[0] & 0x00000002u) != 0; -} -void DetectionBox::set_has_score() { - _has_bits_[0] |= 0x00000002u; -} -void DetectionBox::clear_has_score() { - _has_bits_[0] &= ~0x00000002u; -} -void DetectionBox::clear_score() { - score_ = 0; - clear_has_score(); -} -float DetectionBox::score() const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.score) - return score_; -} -void DetectionBox::set_score(float value) { - set_has_score(); - score_ = value; - // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.score) -} - -// optional float left_top_x = 3; -bool DetectionBox::has_left_top_x() const { - return (_has_bits_[0] & 0x00000004u) != 0; -} -void DetectionBox::set_has_left_top_x() { - _has_bits_[0] |= 0x00000004u; -} -void DetectionBox::clear_has_left_top_x() { - _has_bits_[0] &= ~0x00000004u; -} -void DetectionBox::clear_left_top_x() { - left_top_x_ = 0; - clear_has_left_top_x(); -} -float DetectionBox::left_top_x() const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.left_top_x) - return left_top_x_; -} -void DetectionBox::set_left_top_x(float value) { - set_has_left_top_x(); - left_top_x_ = value; - // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.left_top_x) -} - -// optional float left_top_y = 4; -bool DetectionBox::has_left_top_y() const { - return (_has_bits_[0] & 0x00000008u) != 0; -} -void DetectionBox::set_has_left_top_y() { - _has_bits_[0] |= 0x00000008u; -} -void DetectionBox::clear_has_left_top_y() { - _has_bits_[0] &= ~0x00000008u; -} -void DetectionBox::clear_left_top_y() { - left_top_y_ = 0; - clear_has_left_top_y(); -} -float DetectionBox::left_top_y() const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.left_top_y) - return left_top_y_; -} -void DetectionBox::set_left_top_y(float value) { - set_has_left_top_y(); - left_top_y_ = value; - // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.left_top_y) -} - -// optional float right_bottom_x = 5; -bool DetectionBox::has_right_bottom_x() const { - return (_has_bits_[0] & 0x00000010u) != 0; -} -void DetectionBox::set_has_right_bottom_x() { - _has_bits_[0] |= 0x00000010u; -} -void DetectionBox::clear_has_right_bottom_x() { - _has_bits_[0] &= ~0x00000010u; -} -void DetectionBox::clear_right_bottom_x() { - right_bottom_x_ = 0; - clear_has_right_bottom_x(); -} -float DetectionBox::right_bottom_x() const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.right_bottom_x) - return right_bottom_x_; -} -void DetectionBox::set_right_bottom_x(float value) { - set_has_right_bottom_x(); - right_bottom_x_ = value; - // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.right_bottom_x) -} - -// optional float right_bottom_y = 6; -bool DetectionBox::has_right_bottom_y() const { - return (_has_bits_[0] & 0x00000020u) != 0; -} -void DetectionBox::set_has_right_bottom_y() { - _has_bits_[0] |= 0x00000020u; -} -void DetectionBox::clear_has_right_bottom_y() { - _has_bits_[0] &= ~0x00000020u; -} -void DetectionBox::clear_right_bottom_y() { - right_bottom_y_ = 0; - clear_has_right_bottom_y(); -} -float DetectionBox::right_bottom_y() const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.right_bottom_y) - return right_bottom_y_; -} -void DetectionBox::set_right_bottom_y(float value) { - set_has_right_bottom_y(); - right_bottom_y_ = value; - // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.right_bottom_y) -} - -inline const DetectionBox* DetectionBox::internal_default_instance() { - return &DetectionBox_default_instance_.get(); -} -#endif // PROTOBUF_INLINE_NOT_IN_HEADERS - -// =================================================================== - -#if !defined(_MSC_VER) || _MSC_VER >= 1900 -const int DetectionResult::kFilenameFieldNumber; -const int DetectionResult::kDetectionBoxesFieldNumber; -#endif // !defined(_MSC_VER) || _MSC_VER >= 1900 - -DetectionResult::DetectionResult() - : ::google::protobuf::Message(), _internal_metadata_(NULL) { - if (this != internal_default_instance()) protobuf_InitDefaults_detection_5fresult_2eproto(); - SharedCtor(); - // @@protoc_insertion_point(constructor:PaddleSolution.DetectionResult) -} - -void DetectionResult::InitAsDefaultInstance() { -} - -DetectionResult::DetectionResult(const DetectionResult& from) - : ::google::protobuf::Message(), - _internal_metadata_(NULL) { - SharedCtor(); - UnsafeMergeFrom(from); - // @@protoc_insertion_point(copy_constructor:PaddleSolution.DetectionResult) -} - -void DetectionResult::SharedCtor() { - _cached_size_ = 0; - filename_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} - -DetectionResult::~DetectionResult() { - // @@protoc_insertion_point(destructor:PaddleSolution.DetectionResult) - SharedDtor(); -} - -void DetectionResult::SharedDtor() { - filename_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} - -void DetectionResult::SetCachedSize(int size) const { - GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN(); - _cached_size_ = size; - GOOGLE_SAFE_CONCURRENT_WRITES_END(); -} -const ::google::protobuf::Descriptor* DetectionResult::descriptor() { - protobuf_AssignDescriptorsOnce(); - return DetectionResult_descriptor_; -} - -const DetectionResult& DetectionResult::default_instance() { - protobuf_InitDefaults_detection_5fresult_2eproto(); - return *internal_default_instance(); -} - -::google::protobuf::internal::ExplicitlyConstructed DetectionResult_default_instance_; - -DetectionResult* DetectionResult::New(::google::protobuf::Arena* arena) const { - DetectionResult* n = new DetectionResult; - if (arena != NULL) { - arena->Own(n); - } - return n; -} - -void DetectionResult::Clear() { -// @@protoc_insertion_point(message_clear_start:PaddleSolution.DetectionResult) - if (has_filename()) { - filename_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - } - detection_boxes_.Clear(); - _has_bits_.Clear(); - if (_internal_metadata_.have_unknown_fields()) { - mutable_unknown_fields()->Clear(); - } -} - -bool DetectionResult::MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) { -#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure - ::google::protobuf::uint32 tag; - // @@protoc_insertion_point(parse_start:PaddleSolution.DetectionResult) - for (;;) { - ::std::pair< ::google::protobuf::uint32, bool> p = input->ReadTagWithCutoff(127); - tag = p.first; - if (!p.second) goto handle_unusual; - switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { - // optional string filename = 1; - case 1: { - if (tag == 10) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_filename())); - ::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField( - this->filename().data(), this->filename().length(), - ::google::protobuf::internal::WireFormat::PARSE, - "PaddleSolution.DetectionResult.filename"); - } else { - goto handle_unusual; - } - if (input->ExpectTag(18)) goto parse_detection_boxes; - break; - } - - // repeated .PaddleSolution.DetectionBox detection_boxes = 2; - case 2: { - if (tag == 18) { - parse_detection_boxes: - DO_(input->IncrementRecursionDepth()); - parse_loop_detection_boxes: - DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtualNoRecursionDepth( - input, add_detection_boxes())); - } else { - goto handle_unusual; - } - if (input->ExpectTag(18)) goto parse_loop_detection_boxes; - input->UnsafeDecrementRecursionDepth(); - if (input->ExpectAtEnd()) goto success; - break; - } - - default: { - handle_unusual: - if (tag == 0 || - ::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == - ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) { - goto success; - } - DO_(::google::protobuf::internal::WireFormat::SkipField( - input, tag, mutable_unknown_fields())); - break; - } - } - } -success: - // @@protoc_insertion_point(parse_success:PaddleSolution.DetectionResult) - return true; -failure: - // @@protoc_insertion_point(parse_failure:PaddleSolution.DetectionResult) - return false; -#undef DO_ -} - -void DetectionResult::SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const { - // @@protoc_insertion_point(serialize_start:PaddleSolution.DetectionResult) - // optional string filename = 1; - if (has_filename()) { - ::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField( - this->filename().data(), this->filename().length(), - ::google::protobuf::internal::WireFormat::SERIALIZE, - "PaddleSolution.DetectionResult.filename"); - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 1, this->filename(), output); - } - - // repeated .PaddleSolution.DetectionBox detection_boxes = 2; - for (unsigned int i = 0, n = this->detection_boxes_size(); i < n; i++) { - ::google::protobuf::internal::WireFormatLite::WriteMessageMaybeToArray( - 2, this->detection_boxes(i), output); - } - - if (_internal_metadata_.have_unknown_fields()) { - ::google::protobuf::internal::WireFormat::SerializeUnknownFields( - unknown_fields(), output); - } - // @@protoc_insertion_point(serialize_end:PaddleSolution.DetectionResult) -} - -::google::protobuf::uint8* DetectionResult::InternalSerializeWithCachedSizesToArray( - bool deterministic, ::google::protobuf::uint8* target) const { - (void)deterministic; // Unused - // @@protoc_insertion_point(serialize_to_array_start:PaddleSolution.DetectionResult) - // optional string filename = 1; - if (has_filename()) { - ::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField( - this->filename().data(), this->filename().length(), - ::google::protobuf::internal::WireFormat::SERIALIZE, - "PaddleSolution.DetectionResult.filename"); - target = - ::google::protobuf::internal::WireFormatLite::WriteStringToArray( - 1, this->filename(), target); - } - - // repeated .PaddleSolution.DetectionBox detection_boxes = 2; - for (unsigned int i = 0, n = this->detection_boxes_size(); i < n; i++) { - target = ::google::protobuf::internal::WireFormatLite:: - InternalWriteMessageNoVirtualToArray( - 2, this->detection_boxes(i), false, target); - } - - if (_internal_metadata_.have_unknown_fields()) { - target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray( - unknown_fields(), target); - } - // @@protoc_insertion_point(serialize_to_array_end:PaddleSolution.DetectionResult) - return target; -} - -size_t DetectionResult::ByteSizeLong() const { -// @@protoc_insertion_point(message_byte_size_start:PaddleSolution.DetectionResult) - size_t total_size = 0; - - // optional string filename = 1; - if (has_filename()) { - total_size += 1 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->filename()); - } - - // repeated .PaddleSolution.DetectionBox detection_boxes = 2; - { - unsigned int count = this->detection_boxes_size(); - total_size += 1UL * count; - for (unsigned int i = 0; i < count; i++) { - total_size += - ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual( - this->detection_boxes(i)); - } - } - - if (_internal_metadata_.have_unknown_fields()) { - total_size += - ::google::protobuf::internal::WireFormat::ComputeUnknownFieldsSize( - unknown_fields()); - } - int cached_size = ::google::protobuf::internal::ToCachedSize(total_size); - GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN(); - _cached_size_ = cached_size; - GOOGLE_SAFE_CONCURRENT_WRITES_END(); - return total_size; -} - -void DetectionResult::MergeFrom(const ::google::protobuf::Message& from) { -// @@protoc_insertion_point(generalized_merge_from_start:PaddleSolution.DetectionResult) - if (GOOGLE_PREDICT_FALSE(&from == this)) MergeFromFail(__LINE__); - const DetectionResult* source = - ::google::protobuf::internal::DynamicCastToGenerated( - &from); - if (source == NULL) { - // @@protoc_insertion_point(generalized_merge_from_cast_fail:PaddleSolution.DetectionResult) - ::google::protobuf::internal::ReflectionOps::Merge(from, this); - } else { - // @@protoc_insertion_point(generalized_merge_from_cast_success:PaddleSolution.DetectionResult) - UnsafeMergeFrom(*source); - } -} - -void DetectionResult::MergeFrom(const DetectionResult& from) { -// @@protoc_insertion_point(class_specific_merge_from_start:PaddleSolution.DetectionResult) - if (GOOGLE_PREDICT_TRUE(&from != this)) { - UnsafeMergeFrom(from); - } else { - MergeFromFail(__LINE__); - } -} - -void DetectionResult::UnsafeMergeFrom(const DetectionResult& from) { - GOOGLE_DCHECK(&from != this); - detection_boxes_.MergeFrom(from.detection_boxes_); - if (from._has_bits_[0 / 32] & (0xffu << (0 % 32))) { - if (from.has_filename()) { - set_has_filename(); - filename_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.filename_); - } - } - if (from._internal_metadata_.have_unknown_fields()) { - ::google::protobuf::UnknownFieldSet::MergeToInternalMetdata( - from.unknown_fields(), &_internal_metadata_); - } -} - -void DetectionResult::CopyFrom(const ::google::protobuf::Message& from) { -// @@protoc_insertion_point(generalized_copy_from_start:PaddleSolution.DetectionResult) - if (&from == this) return; - Clear(); - MergeFrom(from); -} - -void DetectionResult::CopyFrom(const DetectionResult& from) { -// @@protoc_insertion_point(class_specific_copy_from_start:PaddleSolution.DetectionResult) - if (&from == this) return; - Clear(); - UnsafeMergeFrom(from); -} - -bool DetectionResult::IsInitialized() const { - - return true; -} - -void DetectionResult::Swap(DetectionResult* other) { - if (other == this) return; - InternalSwap(other); -} -void DetectionResult::InternalSwap(DetectionResult* other) { - filename_.Swap(&other->filename_); - detection_boxes_.UnsafeArenaSwap(&other->detection_boxes_); - std::swap(_has_bits_[0], other->_has_bits_[0]); - _internal_metadata_.Swap(&other->_internal_metadata_); - std::swap(_cached_size_, other->_cached_size_); -} - -::google::protobuf::Metadata DetectionResult::GetMetadata() const { - protobuf_AssignDescriptorsOnce(); - ::google::protobuf::Metadata metadata; - metadata.descriptor = DetectionResult_descriptor_; - metadata.reflection = DetectionResult_reflection_; - return metadata; -} - -#if PROTOBUF_INLINE_NOT_IN_HEADERS -// DetectionResult - -// optional string filename = 1; -bool DetectionResult::has_filename() const { - return (_has_bits_[0] & 0x00000001u) != 0; -} -void DetectionResult::set_has_filename() { - _has_bits_[0] |= 0x00000001u; -} -void DetectionResult::clear_has_filename() { - _has_bits_[0] &= ~0x00000001u; -} -void DetectionResult::clear_filename() { - filename_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - clear_has_filename(); -} -const ::std::string& DetectionResult::filename() const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionResult.filename) - return filename_.GetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -void DetectionResult::set_filename(const ::std::string& value) { - set_has_filename(); - filename_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); - // @@protoc_insertion_point(field_set:PaddleSolution.DetectionResult.filename) -} -void DetectionResult::set_filename(const char* value) { - set_has_filename(); - filename_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value)); - // @@protoc_insertion_point(field_set_char:PaddleSolution.DetectionResult.filename) -} -void DetectionResult::set_filename(const char* value, size_t size) { - set_has_filename(); - filename_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); - // @@protoc_insertion_point(field_set_pointer:PaddleSolution.DetectionResult.filename) -} -::std::string* DetectionResult::mutable_filename() { - set_has_filename(); - // @@protoc_insertion_point(field_mutable:PaddleSolution.DetectionResult.filename) - return filename_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -::std::string* DetectionResult::release_filename() { - // @@protoc_insertion_point(field_release:PaddleSolution.DetectionResult.filename) - clear_has_filename(); - return filename_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -void DetectionResult::set_allocated_filename(::std::string* filename) { - if (filename != NULL) { - set_has_filename(); - } else { - clear_has_filename(); - } - filename_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), filename); - // @@protoc_insertion_point(field_set_allocated:PaddleSolution.DetectionResult.filename) -} - -// repeated .PaddleSolution.DetectionBox detection_boxes = 2; -int DetectionResult::detection_boxes_size() const { - return detection_boxes_.size(); -} -void DetectionResult::clear_detection_boxes() { - detection_boxes_.Clear(); -} -const ::PaddleSolution::DetectionBox& DetectionResult::detection_boxes(int index) const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionResult.detection_boxes) - return detection_boxes_.Get(index); -} -::PaddleSolution::DetectionBox* DetectionResult::mutable_detection_boxes(int index) { - // @@protoc_insertion_point(field_mutable:PaddleSolution.DetectionResult.detection_boxes) - return detection_boxes_.Mutable(index); -} -::PaddleSolution::DetectionBox* DetectionResult::add_detection_boxes() { - // @@protoc_insertion_point(field_add:PaddleSolution.DetectionResult.detection_boxes) - return detection_boxes_.Add(); -} -::google::protobuf::RepeatedPtrField< ::PaddleSolution::DetectionBox >* -DetectionResult::mutable_detection_boxes() { - // @@protoc_insertion_point(field_mutable_list:PaddleSolution.DetectionResult.detection_boxes) - return &detection_boxes_; -} -const ::google::protobuf::RepeatedPtrField< ::PaddleSolution::DetectionBox >& -DetectionResult::detection_boxes() const { - // @@protoc_insertion_point(field_list:PaddleSolution.DetectionResult.detection_boxes) - return detection_boxes_; -} - -inline const DetectionResult* DetectionResult::internal_default_instance() { - return &DetectionResult_default_instance_.get(); -} -#endif // PROTOBUF_INLINE_NOT_IN_HEADERS - -// @@protoc_insertion_point(namespace_scope) - -} // namespace PaddleSolution - -// @@protoc_insertion_point(global_scope) diff --git a/PaddleCV/PaddleDetection/inference/utils/detection_result.pb.h b/PaddleCV/PaddleDetection/inference/utils/detection_result.pb.h deleted file mode 100644 index 1b2f89ea9ca13f3f949bd19b097bb514a4afc525..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/utils/detection_result.pb.h +++ /dev/null @@ -1,563 +0,0 @@ -// Generated by the protocol buffer compiler. DO NOT EDIT! -// source: detection_result.proto - -#ifndef PROTOBUF_detection_5fresult_2eproto__INCLUDED -#define PROTOBUF_detection_5fresult_2eproto__INCLUDED - -#include - -#include - -#if GOOGLE_PROTOBUF_VERSION < 3001000 -#error This file was generated by a newer version of protoc which is -#error incompatible with your Protocol Buffer headers. Please update -#error your headers. -#endif -#if 3001000 < GOOGLE_PROTOBUF_MIN_PROTOC_VERSION -#error This file was generated by an older version of protoc which is -#error incompatible with your Protocol Buffer headers. Please -#error regenerate this file with a newer version of protoc. -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -// @@protoc_insertion_point(includes) - -namespace PaddleSolution { - -// Internal implementation detail -- do not call these. -void protobuf_AddDesc_detection_5fresult_2eproto(); -void protobuf_InitDefaults_detection_5fresult_2eproto(); -void protobuf_AssignDesc_detection_5fresult_2eproto(); -void protobuf_ShutdownFile_detection_5fresult_2eproto(); - -class DetectionBox; -class DetectionResult; - -// =================================================================== - -class DetectionBox : public ::google::protobuf::Message /* @@protoc_insertion_point(class_definition:PaddleSolution.DetectionBox) */ { - public: - DetectionBox(); - virtual ~DetectionBox(); - - DetectionBox(const DetectionBox& from); - - inline DetectionBox& operator=(const DetectionBox& from) { - CopyFrom(from); - return *this; - } - - inline const ::google::protobuf::UnknownFieldSet& unknown_fields() const { - return _internal_metadata_.unknown_fields(); - } - - inline ::google::protobuf::UnknownFieldSet* mutable_unknown_fields() { - return _internal_metadata_.mutable_unknown_fields(); - } - - static const ::google::protobuf::Descriptor* descriptor(); - static const DetectionBox& default_instance(); - - static const DetectionBox* internal_default_instance(); - - void Swap(DetectionBox* other); - - // implements Message ---------------------------------------------- - - inline DetectionBox* New() const { return New(NULL); } - - DetectionBox* New(::google::protobuf::Arena* arena) const; - void CopyFrom(const ::google::protobuf::Message& from); - void MergeFrom(const ::google::protobuf::Message& from); - void CopyFrom(const DetectionBox& from); - void MergeFrom(const DetectionBox& from); - void Clear(); - bool IsInitialized() const; - - size_t ByteSizeLong() const; - bool MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input); - void SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const; - ::google::protobuf::uint8* InternalSerializeWithCachedSizesToArray( - bool deterministic, ::google::protobuf::uint8* output) const; - ::google::protobuf::uint8* SerializeWithCachedSizesToArray(::google::protobuf::uint8* output) const { - return InternalSerializeWithCachedSizesToArray(false, output); - } - int GetCachedSize() const { return _cached_size_; } - private: - void SharedCtor(); - void SharedDtor(); - void SetCachedSize(int size) const; - void InternalSwap(DetectionBox* other); - void UnsafeMergeFrom(const DetectionBox& from); - private: - inline ::google::protobuf::Arena* GetArenaNoVirtual() const { - return _internal_metadata_.arena(); - } - inline void* MaybeArenaPtr() const { - return _internal_metadata_.raw_arena_ptr(); - } - public: - - ::google::protobuf::Metadata GetMetadata() const; - - // nested types ---------------------------------------------------- - - // accessors ------------------------------------------------------- - - // optional int32 class = 1; - bool has_class_() const; - void clear_class_(); - static const int kClassFieldNumber = 1; - ::google::protobuf::int32 class_() const; - void set_class_(::google::protobuf::int32 value); - - // optional float score = 2; - bool has_score() const; - void clear_score(); - static const int kScoreFieldNumber = 2; - float score() const; - void set_score(float value); - - // optional float left_top_x = 3; - bool has_left_top_x() const; - void clear_left_top_x(); - static const int kLeftTopXFieldNumber = 3; - float left_top_x() const; - void set_left_top_x(float value); - - // optional float left_top_y = 4; - bool has_left_top_y() const; - void clear_left_top_y(); - static const int kLeftTopYFieldNumber = 4; - float left_top_y() const; - void set_left_top_y(float value); - - // optional float right_bottom_x = 5; - bool has_right_bottom_x() const; - void clear_right_bottom_x(); - static const int kRightBottomXFieldNumber = 5; - float right_bottom_x() const; - void set_right_bottom_x(float value); - - // optional float right_bottom_y = 6; - bool has_right_bottom_y() const; - void clear_right_bottom_y(); - static const int kRightBottomYFieldNumber = 6; - float right_bottom_y() const; - void set_right_bottom_y(float value); - - // @@protoc_insertion_point(class_scope:PaddleSolution.DetectionBox) - private: - inline void set_has_class_(); - inline void clear_has_class_(); - inline void set_has_score(); - inline void clear_has_score(); - inline void set_has_left_top_x(); - inline void clear_has_left_top_x(); - inline void set_has_left_top_y(); - inline void clear_has_left_top_y(); - inline void set_has_right_bottom_x(); - inline void clear_has_right_bottom_x(); - inline void set_has_right_bottom_y(); - inline void clear_has_right_bottom_y(); - - ::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_; - ::google::protobuf::internal::HasBits<1> _has_bits_; - mutable int _cached_size_; - ::google::protobuf::int32 class__; - float score_; - float left_top_x_; - float left_top_y_; - float right_bottom_x_; - float right_bottom_y_; - friend void protobuf_InitDefaults_detection_5fresult_2eproto_impl(); - friend void protobuf_AddDesc_detection_5fresult_2eproto_impl(); - friend void protobuf_AssignDesc_detection_5fresult_2eproto(); - friend void protobuf_ShutdownFile_detection_5fresult_2eproto(); - - void InitAsDefaultInstance(); -}; -extern ::google::protobuf::internal::ExplicitlyConstructed DetectionBox_default_instance_; - -// ------------------------------------------------------------------- - -class DetectionResult : public ::google::protobuf::Message /* @@protoc_insertion_point(class_definition:PaddleSolution.DetectionResult) */ { - public: - DetectionResult(); - virtual ~DetectionResult(); - - DetectionResult(const DetectionResult& from); - - inline DetectionResult& operator=(const DetectionResult& from) { - CopyFrom(from); - return *this; - } - - inline const ::google::protobuf::UnknownFieldSet& unknown_fields() const { - return _internal_metadata_.unknown_fields(); - } - - inline ::google::protobuf::UnknownFieldSet* mutable_unknown_fields() { - return _internal_metadata_.mutable_unknown_fields(); - } - - static const ::google::protobuf::Descriptor* descriptor(); - static const DetectionResult& default_instance(); - - static const DetectionResult* internal_default_instance(); - - void Swap(DetectionResult* other); - - // implements Message ---------------------------------------------- - - inline DetectionResult* New() const { return New(NULL); } - - DetectionResult* New(::google::protobuf::Arena* arena) const; - void CopyFrom(const ::google::protobuf::Message& from); - void MergeFrom(const ::google::protobuf::Message& from); - void CopyFrom(const DetectionResult& from); - void MergeFrom(const DetectionResult& from); - void Clear(); - bool IsInitialized() const; - - size_t ByteSizeLong() const; - bool MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input); - void SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const; - ::google::protobuf::uint8* InternalSerializeWithCachedSizesToArray( - bool deterministic, ::google::protobuf::uint8* output) const; - ::google::protobuf::uint8* SerializeWithCachedSizesToArray(::google::protobuf::uint8* output) const { - return InternalSerializeWithCachedSizesToArray(false, output); - } - int GetCachedSize() const { return _cached_size_; } - private: - void SharedCtor(); - void SharedDtor(); - void SetCachedSize(int size) const; - void InternalSwap(DetectionResult* other); - void UnsafeMergeFrom(const DetectionResult& from); - private: - inline ::google::protobuf::Arena* GetArenaNoVirtual() const { - return _internal_metadata_.arena(); - } - inline void* MaybeArenaPtr() const { - return _internal_metadata_.raw_arena_ptr(); - } - public: - - ::google::protobuf::Metadata GetMetadata() const; - - // nested types ---------------------------------------------------- - - // accessors ------------------------------------------------------- - - // optional string filename = 1; - bool has_filename() const; - void clear_filename(); - static const int kFilenameFieldNumber = 1; - const ::std::string& filename() const; - void set_filename(const ::std::string& value); - void set_filename(const char* value); - void set_filename(const char* value, size_t size); - ::std::string* mutable_filename(); - ::std::string* release_filename(); - void set_allocated_filename(::std::string* filename); - - // repeated .PaddleSolution.DetectionBox detection_boxes = 2; - int detection_boxes_size() const; - void clear_detection_boxes(); - static const int kDetectionBoxesFieldNumber = 2; - const ::PaddleSolution::DetectionBox& detection_boxes(int index) const; - ::PaddleSolution::DetectionBox* mutable_detection_boxes(int index); - ::PaddleSolution::DetectionBox* add_detection_boxes(); - ::google::protobuf::RepeatedPtrField< ::PaddleSolution::DetectionBox >* - mutable_detection_boxes(); - const ::google::protobuf::RepeatedPtrField< ::PaddleSolution::DetectionBox >& - detection_boxes() const; - - // @@protoc_insertion_point(class_scope:PaddleSolution.DetectionResult) - private: - inline void set_has_filename(); - inline void clear_has_filename(); - - ::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_; - ::google::protobuf::internal::HasBits<1> _has_bits_; - mutable int _cached_size_; - ::google::protobuf::RepeatedPtrField< ::PaddleSolution::DetectionBox > detection_boxes_; - ::google::protobuf::internal::ArenaStringPtr filename_; - friend void protobuf_InitDefaults_detection_5fresult_2eproto_impl(); - friend void protobuf_AddDesc_detection_5fresult_2eproto_impl(); - friend void protobuf_AssignDesc_detection_5fresult_2eproto(); - friend void protobuf_ShutdownFile_detection_5fresult_2eproto(); - - void InitAsDefaultInstance(); -}; -extern ::google::protobuf::internal::ExplicitlyConstructed DetectionResult_default_instance_; - -// =================================================================== - - -// =================================================================== - -#if !PROTOBUF_INLINE_NOT_IN_HEADERS -// DetectionBox - -// optional int32 class = 1; -inline bool DetectionBox::has_class_() const { - return (_has_bits_[0] & 0x00000001u) != 0; -} -inline void DetectionBox::set_has_class_() { - _has_bits_[0] |= 0x00000001u; -} -inline void DetectionBox::clear_has_class_() { - _has_bits_[0] &= ~0x00000001u; -} -inline void DetectionBox::clear_class_() { - class__ = 0; - clear_has_class_(); -} -inline ::google::protobuf::int32 DetectionBox::class_() const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.class) - return class__; -} -inline void DetectionBox::set_class_(::google::protobuf::int32 value) { - set_has_class_(); - class__ = value; - // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.class) -} - -// optional float score = 2; -inline bool DetectionBox::has_score() const { - return (_has_bits_[0] & 0x00000002u) != 0; -} -inline void DetectionBox::set_has_score() { - _has_bits_[0] |= 0x00000002u; -} -inline void DetectionBox::clear_has_score() { - _has_bits_[0] &= ~0x00000002u; -} -inline void DetectionBox::clear_score() { - score_ = 0; - clear_has_score(); -} -inline float DetectionBox::score() const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.score) - return score_; -} -inline void DetectionBox::set_score(float value) { - set_has_score(); - score_ = value; - // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.score) -} - -// optional float left_top_x = 3; -inline bool DetectionBox::has_left_top_x() const { - return (_has_bits_[0] & 0x00000004u) != 0; -} -inline void DetectionBox::set_has_left_top_x() { - _has_bits_[0] |= 0x00000004u; -} -inline void DetectionBox::clear_has_left_top_x() { - _has_bits_[0] &= ~0x00000004u; -} -inline void DetectionBox::clear_left_top_x() { - left_top_x_ = 0; - clear_has_left_top_x(); -} -inline float DetectionBox::left_top_x() const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.left_top_x) - return left_top_x_; -} -inline void DetectionBox::set_left_top_x(float value) { - set_has_left_top_x(); - left_top_x_ = value; - // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.left_top_x) -} - -// optional float left_top_y = 4; -inline bool DetectionBox::has_left_top_y() const { - return (_has_bits_[0] & 0x00000008u) != 0; -} -inline void DetectionBox::set_has_left_top_y() { - _has_bits_[0] |= 0x00000008u; -} -inline void DetectionBox::clear_has_left_top_y() { - _has_bits_[0] &= ~0x00000008u; -} -inline void DetectionBox::clear_left_top_y() { - left_top_y_ = 0; - clear_has_left_top_y(); -} -inline float DetectionBox::left_top_y() const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.left_top_y) - return left_top_y_; -} -inline void DetectionBox::set_left_top_y(float value) { - set_has_left_top_y(); - left_top_y_ = value; - // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.left_top_y) -} - -// optional float right_bottom_x = 5; -inline bool DetectionBox::has_right_bottom_x() const { - return (_has_bits_[0] & 0x00000010u) != 0; -} -inline void DetectionBox::set_has_right_bottom_x() { - _has_bits_[0] |= 0x00000010u; -} -inline void DetectionBox::clear_has_right_bottom_x() { - _has_bits_[0] &= ~0x00000010u; -} -inline void DetectionBox::clear_right_bottom_x() { - right_bottom_x_ = 0; - clear_has_right_bottom_x(); -} -inline float DetectionBox::right_bottom_x() const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.right_bottom_x) - return right_bottom_x_; -} -inline void DetectionBox::set_right_bottom_x(float value) { - set_has_right_bottom_x(); - right_bottom_x_ = value; - // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.right_bottom_x) -} - -// optional float right_bottom_y = 6; -inline bool DetectionBox::has_right_bottom_y() const { - return (_has_bits_[0] & 0x00000020u) != 0; -} -inline void DetectionBox::set_has_right_bottom_y() { - _has_bits_[0] |= 0x00000020u; -} -inline void DetectionBox::clear_has_right_bottom_y() { - _has_bits_[0] &= ~0x00000020u; -} -inline void DetectionBox::clear_right_bottom_y() { - right_bottom_y_ = 0; - clear_has_right_bottom_y(); -} -inline float DetectionBox::right_bottom_y() const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionBox.right_bottom_y) - return right_bottom_y_; -} -inline void DetectionBox::set_right_bottom_y(float value) { - set_has_right_bottom_y(); - right_bottom_y_ = value; - // @@protoc_insertion_point(field_set:PaddleSolution.DetectionBox.right_bottom_y) -} - -inline const DetectionBox* DetectionBox::internal_default_instance() { - return &DetectionBox_default_instance_.get(); -} -// ------------------------------------------------------------------- - -// DetectionResult - -// optional string filename = 1; -inline bool DetectionResult::has_filename() const { - return (_has_bits_[0] & 0x00000001u) != 0; -} -inline void DetectionResult::set_has_filename() { - _has_bits_[0] |= 0x00000001u; -} -inline void DetectionResult::clear_has_filename() { - _has_bits_[0] &= ~0x00000001u; -} -inline void DetectionResult::clear_filename() { - filename_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - clear_has_filename(); -} -inline const ::std::string& DetectionResult::filename() const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionResult.filename) - return filename_.GetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline void DetectionResult::set_filename(const ::std::string& value) { - set_has_filename(); - filename_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); - // @@protoc_insertion_point(field_set:PaddleSolution.DetectionResult.filename) -} -inline void DetectionResult::set_filename(const char* value) { - set_has_filename(); - filename_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value)); - // @@protoc_insertion_point(field_set_char:PaddleSolution.DetectionResult.filename) -} -inline void DetectionResult::set_filename(const char* value, size_t size) { - set_has_filename(); - filename_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); - // @@protoc_insertion_point(field_set_pointer:PaddleSolution.DetectionResult.filename) -} -inline ::std::string* DetectionResult::mutable_filename() { - set_has_filename(); - // @@protoc_insertion_point(field_mutable:PaddleSolution.DetectionResult.filename) - return filename_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline ::std::string* DetectionResult::release_filename() { - // @@protoc_insertion_point(field_release:PaddleSolution.DetectionResult.filename) - clear_has_filename(); - return filename_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline void DetectionResult::set_allocated_filename(::std::string* filename) { - if (filename != NULL) { - set_has_filename(); - } else { - clear_has_filename(); - } - filename_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), filename); - // @@protoc_insertion_point(field_set_allocated:PaddleSolution.DetectionResult.filename) -} - -// repeated .PaddleSolution.DetectionBox detection_boxes = 2; -inline int DetectionResult::detection_boxes_size() const { - return detection_boxes_.size(); -} -inline void DetectionResult::clear_detection_boxes() { - detection_boxes_.Clear(); -} -inline const ::PaddleSolution::DetectionBox& DetectionResult::detection_boxes(int index) const { - // @@protoc_insertion_point(field_get:PaddleSolution.DetectionResult.detection_boxes) - return detection_boxes_.Get(index); -} -inline ::PaddleSolution::DetectionBox* DetectionResult::mutable_detection_boxes(int index) { - // @@protoc_insertion_point(field_mutable:PaddleSolution.DetectionResult.detection_boxes) - return detection_boxes_.Mutable(index); -} -inline ::PaddleSolution::DetectionBox* DetectionResult::add_detection_boxes() { - // @@protoc_insertion_point(field_add:PaddleSolution.DetectionResult.detection_boxes) - return detection_boxes_.Add(); -} -inline ::google::protobuf::RepeatedPtrField< ::PaddleSolution::DetectionBox >* -DetectionResult::mutable_detection_boxes() { - // @@protoc_insertion_point(field_mutable_list:PaddleSolution.DetectionResult.detection_boxes) - return &detection_boxes_; -} -inline const ::google::protobuf::RepeatedPtrField< ::PaddleSolution::DetectionBox >& -DetectionResult::detection_boxes() const { - // @@protoc_insertion_point(field_list:PaddleSolution.DetectionResult.detection_boxes) - return detection_boxes_; -} - -inline const DetectionResult* DetectionResult::internal_default_instance() { - return &DetectionResult_default_instance_.get(); -} -#endif // !PROTOBUF_INLINE_NOT_IN_HEADERS -// ------------------------------------------------------------------- - - -// @@protoc_insertion_point(namespace_scope) - -} // namespace PaddleSolution - -// @@protoc_insertion_point(global_scope) - -#endif // PROTOBUF_detection_5fresult_2eproto__INCLUDED diff --git a/PaddleCV/PaddleDetection/inference/utils/detection_result.proto b/PaddleCV/PaddleDetection/inference/utils/detection_result.proto deleted file mode 100644 index 2d1cbb2464ac09b0dcea01f8331da5ee7894a4d5..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/utils/detection_result.proto +++ /dev/null @@ -1,21 +0,0 @@ -syntax = "proto2"; -package PaddleSolution; - -message DetectionBox { - optional int32 class = 1; - optional float score = 2; - optional float left_top_x = 3; - optional float left_top_y = 4; - optional float right_bottom_x = 5; - optional float right_bottom_y = 6; -} - -message DetectionResult { - optional string filename = 1; - repeated DetectionBox detection_boxes = 2; -} - -//message DetectionResultsContainer { -// repeated DetectionResult result = 1; -//} - diff --git a/PaddleCV/PaddleDetection/inference/utils/utils.h b/PaddleCV/PaddleDetection/inference/utils/utils.h deleted file mode 100644 index 63245219edb6ad39e896f1eb041e8bff69613382..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/inference/utils/utils.h +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#else -#include -#include -#endif - -namespace PaddleSolution { - namespace utils { - enum SCALE_TYPE{ - UNPADDING, - RANGE_SCALING - }; - inline std::string path_join(const std::string& dir, const std::string& path) { - std::string seperator = "/"; - #ifdef _WIN32 - seperator = "\\"; - #endif - return dir + seperator + path; - } - #ifndef _WIN32 - // scan a directory and get all files with input extensions - inline std::vector get_directory_images(const std::string& path, const std::string& exts) - { - std::vector imgs; - struct dirent *entry; - DIR *dir = opendir(path.c_str()); - if (dir == NULL) { - closedir(dir); - return imgs; - } - - while ((entry = readdir(dir)) != NULL) { - std::string item = entry->d_name; - auto ext = strrchr(entry->d_name, '.'); - if (!ext || std::string(ext) == "." || std::string(ext) == "..") { - continue; - } - if (exts.find(ext) != std::string::npos) { - imgs.push_back(path_join(path, entry->d_name)); - } - } - sort(imgs.begin(), imgs.end()); - return imgs; - } - #else - // scan a directory and get all files with input extensions - inline std::vector get_directory_images(const std::string& path, const std::string& exts) - { - std::vector imgs; - for (const auto& item : std::experimental::filesystem::directory_iterator(path)) { - auto suffix = item.path().extension().string(); - if (exts.find(suffix) != std::string::npos && suffix.size() > 0) { - auto fullname = path_join(path, item.path().filename().string()); - imgs.push_back(item.path().string()); - } - } - sort(imgs.begin(), imgs.end()); - return imgs; - } - #endif - - inline int scaling(int resize_type, int &w, int &h, int new_w, int new_h, int target_size, int max_size, float &im_scale_ratio) - { - if(w <= 0 || h <= 0 || new_w <= 0 || new_h <= 0){ - return -1; - } - switch(resize_type) { - case SCALE_TYPE::UNPADDING: - { - w = new_w; - h = new_h; - im_scale_ratio=0; - } - break; - case SCALE_TYPE::RANGE_SCALING: - { - int im_max_size = std::max(w, h); - int im_min_size = std::min(w, h); - float scale_ratio= static_cast(target_size) / static_cast(im_min_size); - if(max_size > 0) { - if(round(scale_ratio * im_max_size) > max_size) { - scale_ratio = static_cast(max_size) / static_cast(im_max_size); - } - } - w = round(scale_ratio * static_cast(w)); - h = round(scale_ratio * static_cast(h)); - im_scale_ratio = scale_ratio; - } - break; - default : - { - std::cout << "Can't support this type of scaling strategy." << std::endl; - std::cout << "Throw exception at file " << __FILE__ << " on line " << __LINE__ << std::endl; - throw 0; - } - break; - } - return 0; - } - } -} diff --git a/PaddleCV/PaddleDetection/ppdet/__init__.py b/PaddleCV/PaddleDetection/ppdet/__init__.py deleted file mode 100644 index d0c32e26092f6ea25771279418582a24ea449ab2..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/PaddleCV/PaddleDetection/ppdet/core/__init__.py b/PaddleCV/PaddleDetection/ppdet/core/__init__.py deleted file mode 100644 index f8561f944e4ca7453456c476092ba4e2d7a0bb5d..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/core/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import ppdet.modeling -import ppdet.optimizer -import ppdet.data diff --git a/PaddleCV/PaddleDetection/ppdet/core/config/__init__.py b/PaddleCV/PaddleDetection/ppdet/core/config/__init__.py deleted file mode 100644 index d0c32e26092f6ea25771279418582a24ea449ab2..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/core/config/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/PaddleCV/PaddleDetection/ppdet/core/config/schema.py b/PaddleCV/PaddleDetection/ppdet/core/config/schema.py deleted file mode 100644 index efba5be1dfd93243ff7a071f0f30accfa513e18f..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/core/config/schema.py +++ /dev/null @@ -1,258 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import inspect -import importlib -import re - -try: - from docstring_parser import parse as doc_parse -except Exception: - - def doc_parse(*args): - if not doc_parse.__warning_sent__: - from ppdet.utils.cli import ColorTTY - color_tty = ColorTTY() - message = "docstring_parser is not installed, " \ - + "argument description is not available" - print(color_tty.yellow(message)) - doc_parse.__warning_sent__ = True - - doc_parse.__warning_sent__ = False - -try: - from typeguard import check_type -except Exception: - - def check_type(*args): - if not check_type.__warning_sent__: - from ppdet.utils.cli import ColorTTY - color_tty = ColorTTY() - message = "typeguard is not installed," \ - + "type checking is not available" - print(color_tty.yellow(message)) - check_type.__warning_sent__ = True - - check_type.__warning_sent__ = False - -__all__ = ['SchemaValue', 'SchemaDict', 'SharedConfig', 'extract_schema'] - - -class SchemaValue(object): - def __init__(self, name, doc='', type=None): - super(SchemaValue, self).__init__() - self.name = name - self.doc = doc - self.type = type - - def set_default(self, value): - self.default = value - - def has_default(self): - return hasattr(self, 'default') - - -class SchemaDict(dict): - def __init__(self, **kwargs): - super(SchemaDict, self).__init__() - self.schema = {} - self.strict = False - self.doc = "" - self.update(kwargs) - - def __setitem__(self, key, value): - # XXX also update regular dict to SchemaDict?? - if isinstance(value, dict) and key in self and isinstance(self[key], - SchemaDict): - self[key].update(value) - else: - super(SchemaDict, self).__setitem__(key, value) - - def __missing__(self, key): - if self.has_default(key): - return self.schema[key].default - elif key in self.schema: - return self.schema[key] - else: - raise KeyError(key) - - def copy(self): - newone = SchemaDict() - newone.__dict__.update(self.__dict__) - newone.update(self) - return newone - - def set_schema(self, key, value): - assert isinstance(value, SchemaValue) - self.schema[key] = value - - def set_strict(self, strict): - self.strict = strict - - def has_default(self, key): - return key in self.schema and self.schema[key].has_default() - - def is_default(self, key): - if not self.has_default(key): - return False - if hasattr(self[key], '__dict__'): - return True - else: - return key not in self or self[key] == self.schema[key].default - - def find_default_keys(self): - return [ - k for k in list(self.keys()) + list(self.schema.keys()) - if self.is_default(k) - ] - - def mandatory(self): - return any([k for k in self.schema.keys() if not self.has_default(k)]) - - def find_missing_keys(self): - missing = [ - k for k in self.schema.keys() - if k not in self and not self.has_default(k) - ] - placeholders = [k for k in self if self[k] in ('', '')] - return missing + placeholders - - def find_extra_keys(self): - return list(set(self.keys()) - set(self.schema.keys())) - - def find_mismatch_keys(self): - mismatch_keys = [] - for arg in self.schema.values(): - if arg.type is not None: - try: - check_type("{}.{}".format(self.name, arg.name), - self[arg.name], arg.type) - except Exception: - mismatch_keys.append(arg.name) - return mismatch_keys - - def validate(self): - missing_keys = self.find_missing_keys() - if missing_keys: - raise ValueError("Missing param for class<{}>: {}".format( - self.name, ", ".join(missing_keys))) - extra_keys = self.find_extra_keys() - if extra_keys and self.strict: - raise ValueError("Extraneous param for class<{}>: {}".format( - self.name, ", ".join(extra_keys))) - mismatch_keys = self.find_mismatch_keys() - if mismatch_keys: - raise TypeError("Wrong param type for class<{}>: {}".format( - self.name, ", ".join(mismatch_keys))) - - -class SharedConfig(object): - """ - Representation class for `__shared__` annotations, which work as follows: - - - if `key` is set for the module in config file, its value will take - precedence - - if `key` is not set for the module but present in the config file, its - value will be used - - otherwise, use the provided `default_value` as fallback - - Args: - key: config[key] will be injected - default_value: fallback value - """ - - def __init__(self, key, default_value=None): - super(SharedConfig, self).__init__() - self.key = key - self.default_value = default_value - - -def extract_schema(cls): - """ - Extract schema from a given class - - Args: - cls (type): Class from which to extract. - - Returns: - schema (SchemaDict): Extracted schema. - """ - ctor = cls.__init__ - # python 2 compatibility - if hasattr(inspect, 'getfullargspec'): - argspec = inspect.getfullargspec(ctor) - annotations = argspec.annotations - has_kwargs = argspec.varkw is not None - else: - argspec = inspect.getargspec(ctor) - # python 2 type hinting workaround, see pep-3107 - # however, since `typeguard` does not support python 2, type checking - # is still python 3 only for now - annotations = getattr(ctor, '__annotations__', {}) - has_kwargs = argspec.keywords is not None - - names = [arg for arg in argspec.args if arg != 'self'] - defaults = argspec.defaults - num_defaults = argspec.defaults is not None and len(argspec.defaults) or 0 - num_required = len(names) - num_defaults - - docs = cls.__doc__ - if docs is None and getattr(cls, '__category__', None) == 'op': - docs = cls.__call__.__doc__ - docstring = doc_parse(docs) - if docstring is None: - comments = {} - else: - comments = {} - for p in docstring.params: - match_obj = re.match('^([a-zA-Z_]+[a-zA-Z_0-9]*).*', p.arg_name) - if match_obj is not None: - comments[match_obj.group(1)] = p.description - - schema = SchemaDict() - schema.name = cls.__name__ - schema.doc = "" - if docs is not None: - start_pos = docs[0] == '\n' and 1 or 0 - schema.doc = docs[start_pos:].split("\n")[0].strip() - # XXX handle paddle's weird doc convention - if '**' == schema.doc[:2] and '**' == schema.doc[-2:]: - schema.doc = schema.doc[2:-2].strip() - schema.category = hasattr(cls, '__category__') and getattr( - cls, '__category__') or 'module' - schema.strict = not has_kwargs - schema.pymodule = importlib.import_module(cls.__module__) - schema.inject = getattr(cls, '__inject__', []) - schema.shared = getattr(cls, '__shared__', []) - for idx, name in enumerate(names): - comment = name in comments and comments[name] or name - if name in schema.inject: - type_ = None - else: - type_ = name in annotations and annotations[name] or None - value_schema = SchemaValue(name, comment, type_) - if name in schema.shared: - assert idx >= num_required, "shared config must have default value" - default = defaults[idx - num_required] - value_schema.set_default(SharedConfig(name, default)) - elif idx >= num_required: - default = defaults[idx - num_required] - value_schema.set_default(default) - schema.set_schema(name, value_schema) - - return schema diff --git a/PaddleCV/PaddleDetection/ppdet/core/config/yaml_helpers.py b/PaddleCV/PaddleDetection/ppdet/core/config/yaml_helpers.py deleted file mode 100644 index 8a7738b47f4f86acde78ab8a3bcac590d61615fa..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/core/config/yaml_helpers.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import importlib -import inspect - -import yaml -from .schema import SharedConfig - -__all__ = ['serializable', 'Callable'] - - -def _make_python_constructor(cls): - def python_constructor(loader, node): - if isinstance(node, yaml.SequenceNode): - args = loader.construct_sequence(node, deep=True) - return cls(*args) - else: - kwargs = loader.construct_mapping(node, deep=True) - try: - return cls(**kwargs) - except Exception as ex: - print("Error when construct {} instance from yaml config". - format(cls.__name__)) - raise ex - - return python_constructor - - -def _make_python_representer(cls): - # python 2 compatibility - if hasattr(inspect, 'getfullargspec'): - argspec = inspect.getfullargspec(cls) - else: - argspec = inspect.getargspec(cls.__init__) - argnames = [arg for arg in argspec.args if arg != 'self'] - - def python_representer(dumper, obj): - if argnames: - data = {name: getattr(obj, name) for name in argnames} - else: - data = obj.__dict__ - if '_id' in data: - del data['_id'] - return dumper.represent_mapping(u'!{}'.format(cls.__name__), data) - - return python_representer - - -def serializable(cls): - """ - Add loader and dumper for given class, which must be - "trivially serializable" - - Args: - cls: class to be serialized - - Returns: cls - """ - yaml.add_constructor(u'!{}'.format(cls.__name__), - _make_python_constructor(cls)) - yaml.add_representer(cls, _make_python_representer(cls)) - return cls - - -yaml.add_representer(SharedConfig, - lambda d, o: d.represent_data(o.default_value)) - - -@serializable -class Callable(object): - """ - Helper to be used in Yaml for creating arbitrary class objects - - Args: - full_type (str): the full module path to target function - """ - - def __init__(self, full_type, args=[], kwargs={}): - super(Callable, self).__init__() - self.full_type = full_type - self.args = args - self.kwargs = kwargs - - def __call__(self): - if '.' in self.full_type: - idx = self.full_type.rfind('.') - module = importlib.import_module(self.full_type[:idx]) - func_name = self.full_type[idx + 1:] - else: - try: - module = importlib.import_module('builtins') - except Exception: - module = importlib.import_module('__builtin__') - func_name = self.full_type - - func = getattr(module, func_name) - return func(*self.args, **self.kwargs) diff --git a/PaddleCV/PaddleDetection/ppdet/core/workspace.py b/PaddleCV/PaddleDetection/ppdet/core/workspace.py deleted file mode 100644 index bf505d6e4d1aab311057763c52f4ef501606a7fb..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/core/workspace.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import importlib -import os -import sys - -import yaml -import copy - -from .config.schema import SchemaDict, SharedConfig, extract_schema -from .config.yaml_helpers import serializable - -__all__ = [ - 'global_config', - 'load_config', - 'merge_config', - 'get_registered_modules', - 'create', - 'register', - 'serializable', - 'dump_value', -] - - -def dump_value(value): - # XXX this is hackish, but collections.abc is not available in python 2 - if hasattr(value, '__dict__') or isinstance(value, (dict, tuple, list)): - value = yaml.dump(value, default_flow_style=True) - value = value.replace('\n', '') - value = value.replace('...', '') - return "'{}'".format(value) - else: - # primitive types - return str(value) - - -class AttrDict(dict): - """Single level attribute dict, NOT recursive""" - - def __init__(self, **kwargs): - super(AttrDict, self).__init__() - super(AttrDict, self).update(kwargs) - - def __getattr__(self, key): - if key in self: - return self[key] - raise AttributeError("object has no attribute '{}'".format(key)) - - -global_config = AttrDict() - - -def load_config(file_path): - """ - Load config from file. - - Args: - file_path (str): Path of the config file to be loaded. - - Returns: global config - """ - _, ext = os.path.splitext(file_path) - assert ext in ['.yml', '.yaml'], "only support yaml files for now" - with open(file_path) as f: - merge_config(yaml.load(f, Loader=yaml.Loader)) - return global_config - - -def merge_config(config): - """ - Merge config into global config. - - Args: - config (dict): Config to be merged. - - Returns: global config - """ - for key, value in config.items(): - if isinstance(value, dict) and key in global_config: - global_config[key].update(value) - else: - global_config[key] = value - - -def get_registered_modules(): - return {k: v for k, v in global_config.items() if isinstance(v, SchemaDict)} - - -def make_partial(cls): - op_module = importlib.import_module(cls.__op__.__module__) - op = getattr(op_module, cls.__op__.__name__) - cls.__category__ = getattr(cls, '__category__', None) or 'op' - - def partial_apply(self, *args, **kwargs): - kwargs_ = self.__dict__.copy() - kwargs_.update(kwargs) - return op(*args, **kwargs_) - - if getattr(cls, '__append_doc__', True): # XXX should default to True? - if sys.version_info[0] > 2: - cls.__doc__ = "Wrapper for `{}` OP".format(op.__name__) - cls.__init__.__doc__ = op.__doc__ - cls.__call__ = partial_apply - cls.__call__.__doc__ = op.__doc__ - else: - # XXX work around for python 2 - partial_apply.__doc__ = op.__doc__ - cls.__call__ = partial_apply - return cls - - -def register(cls): - """ - Register a given module class. - - Args: - cls (type): Module class to be registered. - - Returns: cls - """ - if cls.__name__ in global_config: - raise ValueError("Module class already registered: {}".format( - cls.__name__)) - if hasattr(cls, '__op__'): - cls = make_partial(cls) - global_config[cls.__name__] = extract_schema(cls) - return cls - - -def create(cls_or_name, **kwargs): - """ - Create an instance of given module class. - - Args: - cls_or_name (type or str): Class of which to create instance. - - Returns: instance of type `cls_or_name` - """ - assert type(cls_or_name) in [type, str - ], "should be a class or name of a class" - name = type(cls_or_name) == str and cls_or_name or cls_or_name.__name__ - assert name in global_config and \ - isinstance(global_config[name], SchemaDict), \ - "the module {} is not registered".format(name) - config = global_config[name] - config.update(kwargs) - config.validate() - cls = getattr(config.pymodule, name) - - kwargs = {} - kwargs.update(global_config[name]) - - # parse `shared` annoation of registered modules - if getattr(config, 'shared', None): - for k in config.shared: - target_key = config[k] - shared_conf = config.schema[k].default - assert isinstance(shared_conf, SharedConfig) - if target_key is not None and not isinstance(target_key, - SharedConfig): - continue # value is given for the module - elif shared_conf.key in global_config: - # `key` is present in config - kwargs[k] = global_config[shared_conf.key] - else: - kwargs[k] = shared_conf.default_value - - # parse `inject` annoation of registered modules - if getattr(config, 'inject', None): - for k in config.inject: - target_key = config[k] - # optional dependency - if target_key is None: - continue - # also accept dictionaries and serialized objects - if isinstance(target_key, dict) or hasattr(target_key, '__dict__'): - continue - elif isinstance(target_key, str): - if target_key not in global_config: - raise ValueError("Missing injection config:", target_key) - target = global_config[target_key] - if isinstance(target, SchemaDict): - kwargs[k] = create(target_key) - elif hasattr(target, '__dict__'): # serialized object - kwargs[k] = target - else: - raise ValueError("Unsupported injection type:", target_key) - # prevent modification of global config values of reference types - # (e.g., list, dict) from within the created module instances - kwargs = copy.deepcopy(kwargs) - return cls(**kwargs) diff --git a/PaddleCV/PaddleDetection/ppdet/data/README.md b/PaddleCV/PaddleDetection/ppdet/data/README.md deleted file mode 120000 index 238fc99bf487f0505c27541ecaa9a64b0bcd62f7..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/README.md +++ /dev/null @@ -1 +0,0 @@ -docs/DATA.md \ No newline at end of file diff --git a/PaddleCV/PaddleDetection/ppdet/data/README_cn.md b/PaddleCV/PaddleDetection/ppdet/data/README_cn.md deleted file mode 120000 index c8e59f3054954c6abe6732b01998a87d6d3074c4..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/README_cn.md +++ /dev/null @@ -1 +0,0 @@ -docs/DATA_cn.md \ No newline at end of file diff --git a/PaddleCV/PaddleDetection/ppdet/data/__init__.py b/PaddleCV/PaddleDetection/ppdet/data/__init__.py deleted file mode 100644 index 1104c33f6ac34b8ec32681f5c4a7fc4d89274bfb..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/__init__.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# function: -# module to prepare data for detection model training -# -# implementation notes: -# - Dateset -# basic interface to accessing data samples in stream mode -# -# - xxxSource (RoiDbSource) -# * subclass of 'Dataset' -# * load data from local files and other source data -# -# - xxxOperator (DecodeImage) -# * subclass of 'BaseOperator' -# * each op can transform a sample, eg: decode/resize/crop image -# * each op must obey basic rules defined in transform.operator.base -# -# - transformer -# * subclass of 'Dataset' -# * 'MappedDataset' accept a 'xxxSource' and a list of 'xxxOperator' -# to build a transformed 'Dataset' - -from __future__ import absolute_import - -from .dataset import Dataset -from .reader import Reader -import traceback -if traceback.extract_stack()[0][ - 0] == 'ppdet/data/tools/generate_data_for_training.py': - __all__ = ['Dataset', 'Reader'] -else: - from .data_feed import create_reader - __all__ = ['Dataset', 'Reader', 'create_reader'] diff --git a/PaddleCV/PaddleDetection/ppdet/data/data_feed.py b/PaddleCV/PaddleDetection/ppdet/data/data_feed.py deleted file mode 100644 index cbaebc2e4860e40481a8e1defdeea3edde22eb7e..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/data_feed.py +++ /dev/null @@ -1,1067 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os -import inspect - -from ppdet.core.workspace import register, serializable -from ppdet.utils.download import get_dataset_path - -from ppdet.data.reader import Reader -# XXX these are for triggering the decorator -from ppdet.data.transform.operators import ( - DecodeImage, MixupImage, NormalizeBox, NormalizeImage, RandomDistort, - RandomFlipImage, RandomInterpImage, ResizeImage, ExpandImage, CropImage, - Permute, MultiscaleTestResize) -from ppdet.data.transform.arrange_sample import ( - ArrangeRCNN, ArrangeEvalRCNN, ArrangeTestRCNN, ArrangeSSD, ArrangeEvalSSD, - ArrangeTestSSD, ArrangeYOLO, ArrangeEvalYOLO, ArrangeTestYOLO) - -__all__ = [ - 'PadBatch', 'MultiScale', 'RandomShape', 'PadMSTest', 'DataSet', - 'CocoDataSet', 'DataFeed', 'TrainFeed', 'EvalFeed', 'FasterRCNNTrainFeed', - 'MaskRCNNTrainFeed', 'FasterRCNNEvalFeed', 'MaskRCNNEvalFeed', - 'FasterRCNNTestFeed', 'MaskRCNNTestFeed', 'SSDTrainFeed', 'SSDEvalFeed', - 'SSDTestFeed', 'YoloTrainFeed', 'YoloEvalFeed', 'YoloTestFeed', - 'create_reader' -] - - -def _prepare_data_config(feed, args_path): - # if `DATASET_DIR` does not exists, search ~/.paddle/dataset for a directory - # named `DATASET_DIR` (e.g., coco, pascal), if not present either, download - dataset_home = args_path if args_path else feed.dataset.dataset_dir - if dataset_home: - annotation = getattr(feed.dataset, 'annotation', None) - image_dir = getattr(feed.dataset, 'image_dir', None) - dataset_dir = get_dataset_path(dataset_home, annotation, image_dir) - if annotation: - feed.dataset.annotation = os.path.join(dataset_dir, annotation) - if image_dir: - feed.dataset.image_dir = os.path.join(dataset_dir, image_dir) - - mixup_epoch = -1 - if getattr(feed, 'mixup_epoch', None) is not None: - mixup_epoch = feed.mixup_epoch - - data_config = { - 'ANNO_FILE': feed.dataset.annotation, - 'IMAGE_DIR': feed.dataset.image_dir, - 'USE_DEFAULT_LABEL': feed.dataset.use_default_label, - 'IS_SHUFFLE': feed.shuffle, - 'SAMPLES': feed.samples, - 'WITH_BACKGROUND': feed.with_background, - 'MIXUP_EPOCH': mixup_epoch, - 'TYPE': type(feed.dataset).__source__ - } - - if feed.mode == 'TRAIN': - data_config['CLASS_AWARE_SAMPLING'] = getattr( - feed, 'class_aware_sampling', False) - - if len(getattr(feed.dataset, 'images', [])) > 0: - data_config['IMAGES'] = feed.dataset.images - - return data_config - - -def create_reader(feed, max_iter=0, args_path=None, my_source=None): - """ - Return iterable data reader. - - Args: - max_iter (int): number of iterations. - my_source (callable): callable function to create a source iterator - which is used to provide source data in 'ppdet.data.reader' - """ - - # if `DATASET_DIR` does not exists, search ~/.paddle/dataset for a directory - # named `DATASET_DIR` (e.g., coco, pascal), if not present either, download - data_config = _prepare_data_config(feed, args_path) - - bufsize = getattr(feed, 'bufsize', 10) - use_process = getattr(feed, 'use_process', False) - memsize = getattr(feed, 'memsize', '3G') - transform_config = { - 'WORKER_CONF': { - 'bufsize': bufsize, - 'worker_num': feed.num_workers, - 'use_process': use_process, - 'memsize': memsize - }, - 'BATCH_SIZE': feed.batch_size, - 'DROP_LAST': feed.drop_last, - 'USE_PADDED_IM_INFO': feed.use_padded_im_info, - } - - batch_transforms = feed.batch_transforms - pad = [t for t in batch_transforms if isinstance(t, PadBatch)] - rand_shape = [t for t in batch_transforms if isinstance(t, RandomShape)] - multi_scale = [t for t in batch_transforms if isinstance(t, MultiScale)] - pad_ms_test = [t for t in batch_transforms if isinstance(t, PadMSTest)] - - if any(pad): - transform_config['IS_PADDING'] = True - if pad[0].pad_to_stride != 0: - transform_config['COARSEST_STRIDE'] = pad[0].pad_to_stride - if any(rand_shape): - transform_config['RANDOM_SHAPES'] = rand_shape[0].sizes - if any(multi_scale): - transform_config['MULTI_SCALES'] = multi_scale[0].scales - if any(pad_ms_test): - transform_config['ENABLE_MULTISCALE_TEST'] = True - transform_config['NUM_SCALE'] = feed.num_scale - transform_config['COARSEST_STRIDE'] = pad_ms_test[0].pad_to_stride - - if hasattr(inspect, 'getfullargspec'): - argspec = inspect.getfullargspec - else: - argspec = inspect.getargspec - - ops = [] - for op in feed.sample_transforms: - op_dict = op.__dict__.copy() - argnames = [ - arg for arg in argspec(type(op).__init__).args if arg != 'self' - ] - op_dict = {k: v for k, v in op_dict.items() if k in argnames} - op_dict['op'] = op.__class__.__name__ - ops.append(op_dict) - transform_config['OPS'] = ops - - return Reader.create(feed.mode, data_config, transform_config, max_iter, - my_source) - - -# XXX batch transforms are only stubs for now, actually handled by `post_map` -@serializable -class PadBatch(object): - """ - Pad a batch of samples to same dimensions - - Args: - pad_to_stride (int): pad to multiple of strides, e.g., 32 - """ - - def __init__(self, pad_to_stride=0): - super(PadBatch, self).__init__() - self.pad_to_stride = pad_to_stride - - -@serializable -class MultiScale(object): - """ - Randomly resize image by scale - - Args: - scales (list): list of int, randomly resize to one of these scales - """ - - def __init__(self, scales=[]): - super(MultiScale, self).__init__() - self.scales = scales - - -@serializable -class RandomShape(object): - """ - Randomly reshape a batch - - Args: - sizes (list): list of int, random choose a size from these - """ - - def __init__(self, sizes=[]): - super(RandomShape, self).__init__() - self.sizes = sizes - - -@serializable -class PadMSTest(object): - """ - Padding for multi-scale test - - Args: - pad_to_stride (int): pad to multiple of strides, e.g., 32 - """ - - def __init__(self, pad_to_stride=0): - super(PadMSTest, self).__init__() - self.pad_to_stride = pad_to_stride - - -@serializable -class DataSet(object): - """ - Dataset, e.g., coco, pascal voc - - Args: - annotation (str): annotation file path - image_dir (str): directory where image files are stored - shuffle (bool): shuffle samples - """ - __source__ = 'RoiDbSource' - - def __init__(self, - annotation, - image_dir=None, - dataset_dir=None, - use_default_label=None): - super(DataSet, self).__init__() - self.dataset_dir = dataset_dir - self.annotation = annotation - self.image_dir = image_dir - self.use_default_label = use_default_label - - -COCO_DATASET_DIR = 'dataset/coco' -COCO_TRAIN_ANNOTATION = 'annotations/instances_train2017.json' -COCO_TRAIN_IMAGE_DIR = 'train2017' -COCO_VAL_ANNOTATION = 'annotations/instances_val2017.json' -COCO_VAL_IMAGE_DIR = 'val2017' - - -@serializable -class CocoDataSet(DataSet): - def __init__(self, - dataset_dir=COCO_DATASET_DIR, - annotation=COCO_TRAIN_ANNOTATION, - image_dir=COCO_TRAIN_IMAGE_DIR): - super(CocoDataSet, self).__init__( - dataset_dir=dataset_dir, annotation=annotation, image_dir=image_dir) - - -VOC_DATASET_DIR = 'dataset/voc' -VOC_TRAIN_ANNOTATION = 'train.txt' -VOC_VAL_ANNOTATION = 'val.txt' -VOC_IMAGE_DIR = None -VOC_USE_DEFAULT_LABEL = True - - -@serializable -class VocDataSet(DataSet): - __source__ = 'VOCSource' - - def __init__(self, - dataset_dir=VOC_DATASET_DIR, - annotation=VOC_TRAIN_ANNOTATION, - image_dir=VOC_IMAGE_DIR, - use_default_label=VOC_USE_DEFAULT_LABEL): - super(VocDataSet, self).__init__( - dataset_dir=dataset_dir, - annotation=annotation, - image_dir=image_dir, - use_default_label=use_default_label) - - -@serializable -class SimpleDataSet(DataSet): - __source__ = 'SimpleSource' - - def __init__(self, - dataset_dir=None, - annotation=None, - image_dir=None, - use_default_label=None): - super(SimpleDataSet, self).__init__( - dataset_dir=dataset_dir, annotation=annotation, image_dir=image_dir) - self.images = [] - - def add_images(self, images): - self.images.extend(images) - - -@serializable -class DataFeed(object): - """ - DataFeed encompasses all data loading related settings - - Args: - dataset (object): a `Dataset` instance - fields (list): list of data fields needed - image_shape (list): list of image dims (C, MAX_DIM, MIN_DIM) - sample_transforms (list): list of sample transformations to use - batch_transforms (list): list of batch transformations to use - batch_size (int): number of images per device - shuffle (bool): if samples should be shuffled - drop_last (bool): drop last batch if size is uneven - num_workers (int): number of workers processes (or threads) - bufsize (int): size of queue used to buffer results from workers - use_process (bool): use process or thread as workers - memsize (str): size of shared memory used in result queue - when 'use_process' is True, default to '3G' - """ - __category__ = 'data' - - def __init__(self, - dataset, - fields, - image_shape, - sample_transforms=None, - batch_transforms=None, - batch_size=1, - shuffle=False, - samples=-1, - drop_last=False, - with_background=True, - num_workers=2, - bufsize=10, - use_process=False, - memsize=None, - use_padded_im_info=False, - class_aware_sampling=False): - super(DataFeed, self).__init__() - self.fields = fields - self.image_shape = image_shape - self.sample_transforms = sample_transforms - self.batch_transforms = batch_transforms - self.batch_size = batch_size - self.shuffle = shuffle - self.samples = samples - self.drop_last = drop_last - self.with_background = with_background - self.num_workers = num_workers - self.bufsize = bufsize - self.use_process = use_process - self.memsize = memsize - self.dataset = dataset - self.use_padded_im_info = use_padded_im_info - self.class_aware_sampling = class_aware_sampling - if isinstance(dataset, dict): - self.dataset = DataSet(**dataset) - - -# for custom (i.e., Non-preset) datasets -@register -class TrainFeed(DataFeed): - __doc__ = DataFeed.__doc__ - - def __init__(self, - dataset, - fields, - image_shape, - sample_transforms=[], - batch_transforms=[], - batch_size=1, - shuffle=True, - samples=-1, - drop_last=False, - with_background=True, - num_workers=2, - bufsize=10, - use_process=True, - memsize=None): - super(TrainFeed, self).__init__( - dataset, - fields, - image_shape, - sample_transforms, - batch_transforms, - batch_size=batch_size, - shuffle=shuffle, - samples=samples, - drop_last=drop_last, - with_background=with_background, - num_workers=num_workers, - bufsize=bufsize, - use_process=use_process, - memsize=memsize) - - -@register -class EvalFeed(DataFeed): - __doc__ = DataFeed.__doc__ - - def __init__(self, - dataset, - fields, - image_shape, - sample_transforms=[], - batch_transforms=[], - batch_size=1, - shuffle=False, - samples=-1, - drop_last=False, - with_background=True, - num_workers=2): - super(EvalFeed, self).__init__( - dataset, - fields, - image_shape, - sample_transforms, - batch_transforms, - batch_size=batch_size, - shuffle=shuffle, - samples=samples, - drop_last=drop_last, - with_background=with_background, - num_workers=num_workers) - - -@register -class TestFeed(DataFeed): - __doc__ = DataFeed.__doc__ - - def __init__(self, - dataset, - fields, - image_shape, - sample_transforms=[], - batch_transforms=[], - batch_size=1, - shuffle=False, - drop_last=False, - with_background=True, - num_workers=2): - super(TestFeed, self).__init__( - dataset, - fields, - image_shape, - sample_transforms, - batch_transforms, - batch_size=batch_size, - shuffle=shuffle, - drop_last=drop_last, - with_background=with_background, - num_workers=num_workers) - - -# yapf: disable -@register -class FasterRCNNTrainFeed(DataFeed): - __doc__ = DataFeed.__doc__ - - def __init__(self, - dataset=CocoDataSet().__dict__, - fields=[ - 'image', 'im_info', 'im_id', 'gt_box', 'gt_label', - 'is_crowd' - ], - image_shape=[None, 3, None, None], - sample_transforms=[ - DecodeImage(to_rgb=True), - RandomFlipImage(prob=0.5), - NormalizeImage(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225], - is_scale=True, - is_channel_first=False), - ResizeImage(target_size=800, max_size=1333, interp=1), - Permute(to_bgr=False) - ], - batch_transforms=[PadBatch()], - batch_size=1, - shuffle=True, - samples=-1, - drop_last=False, - bufsize=10, - num_workers=2, - use_process=False, - memsize=None, - class_aware_sampling=False): - # XXX this should be handled by the data loader, since `fields` is - # given, just collect them - sample_transforms.append(ArrangeRCNN()) - super(FasterRCNNTrainFeed, self).__init__( - dataset, - fields, - image_shape, - sample_transforms, - batch_transforms, - batch_size=batch_size, - shuffle=shuffle, - samples=samples, - drop_last=drop_last, - bufsize=bufsize, - num_workers=num_workers, - use_process=use_process, - memsize=memsize, - class_aware_sampling=class_aware_sampling) - # XXX these modes should be unified - self.mode = 'TRAIN' - - -@register -class FasterRCNNEvalFeed(DataFeed): - __doc__ = DataFeed.__doc__ - - def __init__(self, - dataset=CocoDataSet(COCO_VAL_ANNOTATION, - COCO_VAL_IMAGE_DIR).__dict__, - fields=['image', 'im_info', 'im_id', 'im_shape', 'gt_box', - 'gt_label', 'is_difficult'], - image_shape=[None, 3, None, None], - sample_transforms=[ - DecodeImage(to_rgb=True), - NormalizeImage(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225], - is_scale=True, - is_channel_first=False), - ResizeImage(target_size=800, max_size=1333, interp=1), - Permute(to_bgr=False) - ], - batch_transforms=[PadBatch()], - batch_size=1, - shuffle=False, - samples=-1, - drop_last=False, - num_workers=2, - use_padded_im_info=True, - enable_multiscale=False, - num_scale=1, - enable_aug_flip=False): - sample_transforms.append(ArrangeEvalRCNN()) - super(FasterRCNNEvalFeed, self).__init__( - dataset, - fields, - image_shape, - sample_transforms, - batch_transforms, - batch_size=batch_size, - shuffle=shuffle, - samples=samples, - drop_last=drop_last, - num_workers=num_workers, - use_padded_im_info=use_padded_im_info) - self.mode = 'VAL' - self.enable_multiscale = enable_multiscale - self.num_scale = num_scale - self.enable_aug_flip = enable_aug_flip - - -@register -class FasterRCNNTestFeed(DataFeed): - __doc__ = DataFeed.__doc__ - - def __init__(self, - dataset=SimpleDataSet(COCO_VAL_ANNOTATION, - COCO_VAL_IMAGE_DIR).__dict__, - fields=['image', 'im_info', 'im_id', 'im_shape'], - image_shape=[None, 3, None, None], - sample_transforms=[ - DecodeImage(to_rgb=True), - NormalizeImage(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225], - is_scale=True, - is_channel_first=False), - Permute(to_bgr=False) - ], - batch_transforms=[PadBatch()], - batch_size=1, - shuffle=False, - samples=-1, - drop_last=False, - num_workers=2, - use_padded_im_info=True): - sample_transforms.append(ArrangeTestRCNN()) - if isinstance(dataset, dict): - dataset = SimpleDataSet(**dataset) - super(FasterRCNNTestFeed, self).__init__( - dataset, - fields, - image_shape, - sample_transforms, - batch_transforms, - batch_size=batch_size, - shuffle=shuffle, - samples=samples, - drop_last=drop_last, - num_workers=num_workers, - use_padded_im_info=use_padded_im_info) - self.mode = 'TEST' - - -# XXX currently use two presets, in the future, these should be combined into a -# single `RCNNTrainFeed`. Mask (and keypoint) should be processed -# automatically if `gt_mask` (or `gt_keypoints`) is in the required fields -@register -class MaskRCNNTrainFeed(DataFeed): - __doc__ = DataFeed.__doc__ - - def __init__(self, - dataset=CocoDataSet().__dict__, - fields=[ - 'image', 'im_info', 'im_id', 'gt_box', 'gt_label', - 'is_crowd', 'gt_mask' - ], - image_shape=[None, 3, None, None], - sample_transforms=[ - DecodeImage(to_rgb=True), - RandomFlipImage(prob=0.5, is_mask_flip=True), - NormalizeImage(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225], - is_scale=True, - is_channel_first=False), - ResizeImage(target_size=800, - max_size=1333, - interp=1, - use_cv2=True), - Permute(to_bgr=False, channel_first=True) - ], - batch_transforms=[PadBatch()], - batch_size=1, - shuffle=True, - samples=-1, - drop_last=False, - num_workers=2, - use_process=False, - use_padded_im_info=False): - sample_transforms.append(ArrangeRCNN(is_mask=True)) - super(MaskRCNNTrainFeed, self).__init__( - dataset, - fields, - image_shape, - sample_transforms, - batch_transforms, - batch_size=batch_size, - shuffle=shuffle, - samples=samples, - drop_last=drop_last, - num_workers=num_workers, - use_process=use_process) - self.mode = 'TRAIN' - - -@register -class MaskRCNNEvalFeed(DataFeed): - __doc__ = DataFeed.__doc__ - - def __init__(self, - dataset=CocoDataSet(COCO_VAL_ANNOTATION, - COCO_VAL_IMAGE_DIR).__dict__, - fields=['image', 'im_info', 'im_id', 'im_shape'], - image_shape=[None, 3, None, None], - sample_transforms=[ - DecodeImage(to_rgb=True), - NormalizeImage(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225], - is_scale=True, - is_channel_first=False), - ResizeImage(target_size=800, - max_size=1333, - interp=1, - use_cv2=True), - Permute(to_bgr=False, channel_first=True) - ], - batch_transforms=[PadBatch()], - batch_size=1, - shuffle=False, - samples=-1, - drop_last=False, - num_workers=2, - use_process=False, - use_padded_im_info=True, - enable_multiscale=False, - num_scale=1, - enable_aug_flip=False): - sample_transforms.append(ArrangeTestRCNN()) - super(MaskRCNNEvalFeed, self).__init__( - dataset, - fields, - image_shape, - sample_transforms, - batch_transforms, - batch_size=batch_size, - shuffle=shuffle, - samples=samples, - drop_last=drop_last, - num_workers=num_workers, - use_process=use_process, - use_padded_im_info=use_padded_im_info) - self.mode = 'VAL' - self.enable_multiscale = enable_multiscale - self.num_scale = num_scale - self.enable_aug_flip = enable_aug_flip - - -@register -class MaskRCNNTestFeed(DataFeed): - __doc__ = DataFeed.__doc__ - - def __init__(self, - dataset=SimpleDataSet(COCO_VAL_ANNOTATION, - COCO_VAL_IMAGE_DIR).__dict__, - fields=['image', 'im_info', 'im_id', 'im_shape'], - image_shape=[None, 3, None, None], - sample_transforms=[ - DecodeImage(to_rgb=True), - NormalizeImage( - mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225], - is_scale=True, - is_channel_first=False), - Permute(to_bgr=False, channel_first=True) - ], - batch_transforms=[PadBatch()], - batch_size=1, - shuffle=False, - samples=-1, - drop_last=False, - num_workers=2, - use_process=False, - use_padded_im_info=True): - sample_transforms.append(ArrangeTestRCNN()) - if isinstance(dataset, dict): - dataset = SimpleDataSet(**dataset) - super(MaskRCNNTestFeed, self).__init__( - dataset, - fields, - image_shape, - sample_transforms, - batch_transforms, - batch_size=batch_size, - shuffle=shuffle, - samples=samples, - drop_last=drop_last, - num_workers=num_workers, - use_process=use_process, - use_padded_im_info=use_padded_im_info) - self.mode = 'TEST' - - -@register -class SSDTrainFeed(DataFeed): - __doc__ = DataFeed.__doc__ - - def __init__(self, - dataset=VocDataSet().__dict__, - fields=['image', 'gt_box', 'gt_label'], - image_shape=[3, 300, 300], - sample_transforms=[ - DecodeImage(to_rgb=True, with_mixup=False), - NormalizeBox(), - RandomDistort(brightness_lower=0.875, - brightness_upper=1.125, - is_order=True), - ExpandImage(max_ratio=4, prob=0.5), - CropImage(batch_sampler=[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]], - satisfy_all=False, avoid_no_bbox=False), - ResizeImage(target_size=300, use_cv2=False, interp=1), - RandomFlipImage(is_normalized=True), - Permute(), - NormalizeImage(mean=[127.5, 127.5, 127.5], - std=[127.502231, 127.502231, 127.502231], - is_scale=False) - ], - batch_transforms=[], - batch_size=32, - shuffle=True, - samples=-1, - drop_last=True, - num_workers=8, - bufsize=10, - use_process=True, - memsize=None): - sample_transforms.append(ArrangeSSD()) - super(SSDTrainFeed, self).__init__( - dataset, - fields, - image_shape, - sample_transforms, - batch_transforms, - batch_size=batch_size, - shuffle=shuffle, - samples=samples, - drop_last=drop_last, - num_workers=num_workers, - bufsize=bufsize, - use_process=use_process, - memsize=None) - self.mode = 'TRAIN' - - -@register -class SSDEvalFeed(DataFeed): - __doc__ = DataFeed.__doc__ - - def __init__( - self, - dataset=VocDataSet(VOC_VAL_ANNOTATION).__dict__, - fields=['image', 'im_shape', 'im_id', 'gt_box', - 'gt_label', 'is_difficult'], - image_shape=[3, 300, 300], - sample_transforms=[ - DecodeImage(to_rgb=True, with_mixup=False), - NormalizeBox(), - ResizeImage(target_size=300, use_cv2=False, interp=1), - Permute(), - NormalizeImage( - mean=[127.5, 127.5, 127.5], - std=[127.502231, 127.502231, 127.502231], - is_scale=False) - ], - batch_transforms=[], - batch_size=64, - shuffle=False, - samples=-1, - drop_last=True, - num_workers=8, - bufsize=10, - use_process=False, - memsize=None): - sample_transforms.append(ArrangeEvalSSD(fields)) - super(SSDEvalFeed, self).__init__( - dataset, - fields, - image_shape, - sample_transforms, - batch_transforms, - batch_size=batch_size, - shuffle=shuffle, - samples=samples, - drop_last=drop_last, - num_workers=num_workers, - bufsize=bufsize, - use_process=use_process, - memsize=memsize) - self.mode = 'VAL' - - -@register -class SSDTestFeed(DataFeed): - __doc__ = DataFeed.__doc__ - - def __init__(self, - dataset=SimpleDataSet(VOC_VAL_ANNOTATION).__dict__, - fields=['image', 'im_id', 'im_shape'], - image_shape=[3, 300, 300], - sample_transforms=[ - DecodeImage(to_rgb=True), - ResizeImage(target_size=300, use_cv2=False, interp=1), - Permute(), - NormalizeImage( - mean=[127.5, 127.5, 127.5], - std=[127.502231, 127.502231, 127.502231], - is_scale=False) - ], - batch_transforms=[], - batch_size=1, - shuffle=False, - samples=-1, - drop_last=False, - num_workers=8, - bufsize=10, - use_process=False, - memsize=None): - sample_transforms.append(ArrangeTestSSD()) - if isinstance(dataset, dict): - dataset = SimpleDataSet(**dataset) - super(SSDTestFeed, self).__init__( - dataset, - fields, - image_shape, - sample_transforms, - batch_transforms, - batch_size=batch_size, - shuffle=shuffle, - samples=samples, - drop_last=drop_last, - num_workers=num_workers, - bufsize=bufsize, - use_process=use_process, - memsize=memsize) - self.mode = 'TEST' - - -@register -class YoloTrainFeed(DataFeed): - __doc__ = DataFeed.__doc__ - - def __init__(self, - dataset=CocoDataSet().__dict__, - fields=['image', 'gt_box', 'gt_label', 'gt_score'], - image_shape=[3, 608, 608], - sample_transforms=[ - DecodeImage(to_rgb=True, with_mixup=True), - MixupImage(alpha=1.5, beta=1.5), - NormalizeBox(), - RandomDistort(), - ExpandImage(max_ratio=4., prob=.5, - mean=[123.675, 116.28, 103.53]), - CropImage([[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]), - RandomInterpImage(target_size=608), - RandomFlipImage(is_normalized=True), - NormalizeImage( - mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225], - is_scale=True, - is_channel_first=False), - Permute(to_bgr=False), - ], - batch_transforms=[ - RandomShape(sizes=[ - 320, 352, 384, 416, 448, 480, 512, 544, 576, 608 - ]) - ], - batch_size=8, - shuffle=True, - samples=-1, - drop_last=True, - with_background=False, - num_workers=8, - bufsize=128, - use_process=True, - memsize=None, - num_max_boxes=50, - mixup_epoch=250, - class_aware_sampling=False): - sample_transforms.append(ArrangeYOLO()) - super(YoloTrainFeed, self).__init__( - dataset, - fields, - image_shape, - sample_transforms, - batch_transforms, - batch_size=batch_size, - shuffle=shuffle, - samples=samples, - drop_last=drop_last, - with_background=with_background, - num_workers=num_workers, - bufsize=bufsize, - use_process=use_process, - memsize=memsize, - class_aware_sampling=class_aware_sampling) - self.num_max_boxes = num_max_boxes - self.mixup_epoch = mixup_epoch - self.mode = 'TRAIN' - - -@register -class YoloEvalFeed(DataFeed): - __doc__ = DataFeed.__doc__ - - def __init__(self, - dataset=CocoDataSet(COCO_VAL_ANNOTATION, - COCO_VAL_IMAGE_DIR).__dict__, - fields=['image', 'im_size', 'im_id', 'gt_box', - 'gt_label', 'is_difficult'], - image_shape=[3, 608, 608], - sample_transforms=[ - DecodeImage(to_rgb=True), - ResizeImage(target_size=608, interp=2), - NormalizeImage( - mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225], - is_scale=True, - is_channel_first=False), - Permute(to_bgr=False), - ], - batch_transforms=[], - batch_size=8, - shuffle=False, - samples=-1, - drop_last=False, - with_background=False, - num_workers=8, - num_max_boxes=50, - use_process=False, - memsize=None): - sample_transforms.append(ArrangeEvalYOLO()) - super(YoloEvalFeed, self).__init__( - dataset, - fields, - image_shape, - sample_transforms, - batch_transforms, - batch_size=batch_size, - shuffle=shuffle, - samples=samples, - drop_last=drop_last, - with_background=with_background, - num_workers=num_workers, - use_process=use_process, - memsize=memsize) - self.num_max_boxes = num_max_boxes - self.mode = 'VAL' - self.bufsize = 128 - - # support image shape config, resize image with image_shape - for i, trans in enumerate(sample_transforms): - if isinstance(trans, ResizeImage): - sample_transforms[i] = ResizeImage( - target_size=self.image_shape[-1], - interp=trans.interp) - - -@register -class YoloTestFeed(DataFeed): - __doc__ = DataFeed.__doc__ - - def __init__(self, - dataset=SimpleDataSet(COCO_VAL_ANNOTATION, - COCO_VAL_IMAGE_DIR).__dict__, - fields=['image', 'im_size', 'im_id'], - image_shape=[3, 608, 608], - sample_transforms=[ - DecodeImage(to_rgb=True), - ResizeImage(target_size=608, interp=2), - NormalizeImage(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225], - is_scale=True, - is_channel_first=False), - Permute(to_bgr=False), - ], - batch_transforms=[], - batch_size=1, - shuffle=False, - samples=-1, - drop_last=False, - with_background=False, - num_workers=8, - num_max_boxes=50, - use_process=False, - memsize=None): - sample_transforms.append(ArrangeTestYOLO()) - if isinstance(dataset, dict): - dataset = SimpleDataSet(**dataset) - super(YoloTestFeed, self).__init__( - dataset, - fields, - image_shape, - sample_transforms, - batch_transforms, - batch_size=batch_size, - shuffle=shuffle, - samples=samples, - drop_last=drop_last, - with_background=with_background, - num_workers=num_workers, - use_process=use_process, - memsize=memsize) - self.mode = 'TEST' - self.bufsize = 128 - - # support image shape config, resize image with image_shape - for i, trans in enumerate(sample_transforms): - if isinstance(trans, ResizeImage): - sample_transforms[i] = ResizeImage( - target_size=self.image_shape[-1], - interp=trans.interp) -# yapf: enable diff --git a/PaddleCV/PaddleDetection/ppdet/data/dataset.py b/PaddleCV/PaddleDetection/ppdet/data/dataset.py deleted file mode 100644 index 31d4df4a0146fd7bc5cdd21d02e06d58ebe0f99f..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/dataset.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# function: -# interface for accessing data samples in stream - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -class Dataset(object): - """interface to access a stream of data samples""" - - def __init__(self): - self._epoch = -1 - - def __next__(self): - return self.next() - - def __iter__(self): - return self - - def __str__(self): - return "{}(fname:{}, epoch:{:d}, size:{:d}, pos:{:d})".format( - type(self).__name__, self._fname, self._epoch, - self.size(), self._pos) - - def next(self): - """get next sample""" - raise NotImplementedError('%s.next not available' % - (self.__class__.__name__)) - - def reset(self): - """reset to initial status and begins a new epoch""" - raise NotImplementedError('%s.reset not available' % - (self.__class__.__name__)) - - def size(self): - """get number of samples in this dataset""" - raise NotImplementedError('%s.size not available' % - (self.__class__.__name__)) - - def drained(self): - """whether all sampled has been readed out for this epoch""" - raise NotImplementedError('%s.drained not available' % - (self.__class__.__name__)) - - def epoch_id(self): - """return epoch id for latest sample""" - raise NotImplementedError('%s.epoch_id not available' % - (self.__class__.__name__)) diff --git a/PaddleCV/PaddleDetection/ppdet/data/reader.py b/PaddleCV/PaddleDetection/ppdet/data/reader.py deleted file mode 100644 index b2d4d07850601fb047b7b17fcb7c33ce6f35f6ea..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/reader.py +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# function: -# Interface to build readers for detection data like COCO or VOC -# - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from numbers import Integral - -import logging -from .source import build_source -from .transform import build_mapper, map, batch, batch_map - -logger = logging.getLogger(__name__) - - -class Reader(object): - """Interface to make readers for training or evaluation""" - - def __init__(self, data_cf, trans_conf, maxiter=-1): - self._data_cf = data_cf - self._trans_conf = trans_conf - self._maxiter = maxiter - self._cname2cid = None - assert isinstance(self._maxiter, Integral), "maxiter should be int" - - def _make_reader(self, mode, my_source=None): - """Build reader for training or validation""" - if my_source is None: - file_conf = self._data_cf[mode] - - # 1, Build data source - - sc_conf = {'data_cf': file_conf, 'cname2cid': self._cname2cid} - sc = build_source(sc_conf) - else: - sc = my_source - - # 2, Buid a transformed dataset - ops = self._trans_conf[mode]['OPS'] - batchsize = self._trans_conf[mode]['BATCH_SIZE'] - drop_last = False if 'DROP_LAST' not in \ - self._trans_conf[mode] else self._trans_conf[mode]['DROP_LAST'] - - mapper = build_mapper(ops, {'is_train': mode == 'TRAIN'}) - - worker_args = None - if 'WORKER_CONF' in self._trans_conf[mode]: - worker_args = self._trans_conf[mode]['WORKER_CONF'] - worker_args = {k.lower(): v for k, v in worker_args.items()} - - mapped_ds = map(sc, mapper, worker_args) - # In VAL mode, gt_bbox, gt_label can be empty, and should - # not be dropped - batched_ds = batch( - mapped_ds, batchsize, drop_last, drop_empty=(mode != "VAL")) - - trans_conf = {k.lower(): v for k, v in self._trans_conf[mode].items()} - need_keys = { - 'is_padding', - 'coarsest_stride', - 'random_shapes', - 'multi_scales', - 'use_padded_im_info', - 'enable_multiscale_test', - 'num_scale', - } - bm_config = { - key: value - for key, value in trans_conf.items() if key in need_keys - } - - batched_ds = batch_map(batched_ds, bm_config) - - batched_ds.reset() - if mode.lower() == 'train': - if self._cname2cid is not None: - logger.warn('cname2cid already set, it will be overridden') - self._cname2cid = getattr(sc, 'cname2cid', None) - - # 3, Build a reader - maxit = -1 if self._maxiter <= 0 else self._maxiter - - def _reader(): - n = 0 - while True: - for _batch in batched_ds: - yield _batch - n += 1 - if maxit > 0 and n == maxit: - return - batched_ds.reset() - if maxit <= 0: - return - - if hasattr(sc, 'get_imid2path'): - _reader.imid2path = sc.get_imid2path() - - return _reader - - def train(self): - """Build reader for training""" - return self._make_reader('TRAIN') - - def val(self): - """Build reader for validation""" - return self._make_reader('VAL') - - def test(self): - """Build reader for inference""" - return self._make_reader('TEST') - - @classmethod - def create(cls, - mode, - data_config, - transform_config, - max_iter=-1, - my_source=None, - ret_iter=True): - """ create a specific reader """ - reader = Reader({mode: data_config}, {mode: transform_config}, max_iter) - if ret_iter: - return reader._make_reader(mode, my_source) - else: - return reader diff --git a/PaddleCV/PaddleDetection/ppdet/data/source/__init__.py b/PaddleCV/PaddleDetection/ppdet/data/source/__init__.py deleted file mode 100644 index e55df6962b36906edac71e8b3cb25334a1d336a4..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/source/__init__.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import copy - -from .roidb_source import RoiDbSource -from .simple_source import SimpleSource -from .iterator_source import IteratorSource -from .class_aware_sampling_roidb_source import ClassAwareSamplingRoiDbSource - - -def build_source(config): - """ - Build dataset from source data, default source type is 'RoiDbSource' - Args: - config (dict): should have following structure: - { - data_cf (dict): - anno_file (str): label file or image list file path - image_dir (str): root directory for images - samples (int): number of samples to load, -1 means all - is_shuffle (bool): should samples be shuffled - load_img (bool): should images be loaded - mixup_epoch (int): parse mixup in first n epoch - with_background (bool): whether load background as a class - cname2cid (dict): the label name to id dictionary - } - """ - if 'data_cf' in config: - data_cf = config['data_cf'] - data_cf['cname2cid'] = config['cname2cid'] - else: - data_cf = config - - data_cf = {k.lower(): v for k, v in data_cf.items()} - - args = copy.deepcopy(data_cf) - # defaut type is 'RoiDbSource' - source_type = 'RoiDbSource' - if 'type' in data_cf: - if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource']: - if 'class_aware_sampling' in args and args['class_aware_sampling']: - source_type = 'ClassAwareSamplingRoiDbSource' - else: - source_type = 'RoiDbSource' - if 'class_aware_sampling' in args: - del args['class_aware_sampling'] - else: - source_type = data_cf['type'] - del args['type'] - if source_type == 'RoiDbSource': - return RoiDbSource(**args) - elif source_type == 'SimpleSource': - return SimpleSource(**args) - elif source_type == 'ClassAwareSamplingRoiDbSource': - return ClassAwareSamplingRoiDbSource(**args) - else: - raise ValueError('source type not supported: ' + source_type) diff --git a/PaddleCV/PaddleDetection/ppdet/data/source/class_aware_sampling_roidb_source.py b/PaddleCV/PaddleDetection/ppdet/data/source/class_aware_sampling_roidb_source.py deleted file mode 100644 index 0175037c352594c48cce09ca033de18534937f87..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/source/class_aware_sampling_roidb_source.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#function: -# interface to load data from local files and parse it for samples, -# eg: roidb data in pickled files - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import os -import random - -import copy -import collections -import pickle as pkl -import numpy as np -from .roidb_source import RoiDbSource - - -class ClassAwareSamplingRoiDbSource(RoiDbSource): - """ interface to load class aware sampling roidb data from files - """ - - def __init__(self, - anno_file, - image_dir=None, - samples=-1, - is_shuffle=True, - load_img=False, - cname2cid=None, - use_default_label=None, - mixup_epoch=-1, - with_background=True): - """ Init - - Args: - fname (str): label file path - image_dir (str): root dir for images - samples (int): samples to load, -1 means all - is_shuffle (bool): whether to shuffle samples - load_img (bool): whether load data in this class - cname2cid (dict): the label name to id dictionary - use_default_label (bool):whether use the default mapping of label to id - mixup_epoch (int): parse mixup in first n epoch - with_background (bool): whether load background - as a class - """ - super(ClassAwareSamplingRoiDbSource, self).__init__( - anno_file=anno_file, - image_dir=image_dir, - samples=samples, - is_shuffle=is_shuffle, - load_img=load_img, - cname2cid=cname2cid, - use_default_label=use_default_label, - mixup_epoch=mixup_epoch, - with_background=with_background) - self._img_weights = None - - def __str__(self): - return 'ClassAwareSamplingRoidbSource(fname:%s,epoch:%d,size:%d)' \ - % (self._fname, self._epoch, self.size()) - - def next(self): - """ load next sample - """ - if self._epoch < 0: - self.reset() - - _pos = np.random.choice( - self._samples, 1, replace=False, p=self._img_weights)[0] - sample = copy.deepcopy(self._roidb[_pos]) - - if self._load_img: - sample['image'] = self._load_image(sample['im_file']) - else: - sample['im_file'] = os.path.join(self._image_dir, sample['im_file']) - - return sample - - def _calc_img_weights(self): - """ calculate the probabilities of each sample - """ - imgs_cls = [] - num_per_cls = {} - img_weights = [] - for i, roidb in enumerate(self._roidb): - img_cls = set( - [k for cls in self._roidb[i]['gt_class'] for k in cls]) - imgs_cls.append(img_cls) - for c in img_cls: - if c not in num_per_cls: - num_per_cls[c] = 1 - else: - num_per_cls[c] += 1 - - for i in range(len(self._roidb)): - weights = 0 - for c in imgs_cls[i]: - weights += 1 / num_per_cls[c] - img_weights.append(weights) - # Probabilities sum to 1 - img_weights = img_weights / np.sum(img_weights) - return img_weights - - def reset(self): - """ implementation of Dataset.reset - """ - if self._roidb is None: - self._roidb = self._load() - - if self._img_weights is None: - self._img_weights = self._calc_img_weights() - - self._samples = len(self._roidb) - - if self._epoch < 0: - self._epoch = 0 diff --git a/PaddleCV/PaddleDetection/ppdet/data/source/coco_loader.py b/PaddleCV/PaddleDetection/ppdet/data/source/coco_loader.py deleted file mode 100644 index db18498905358eef66b07969dab8f65606d3cdc0..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/source/coco_loader.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -from pycocotools.coco import COCO - -import logging -logger = logging.getLogger(__name__) - - -def load(anno_path, sample_num=-1, with_background=True): - """ - Load COCO records with annotations in json file 'anno_path' - - Args: - anno_path (str): json file path - sample_num (int): number of samples to load, -1 means all - with_background (bool): whether load background as a class. - if True, total class number will - be 81. default True - - Returns: - (records, cname2cid) - 'records' is list of dict whose structure is: - { - 'im_file': im_fname, # image file name - 'im_id': img_id, # image id - 'h': im_h, # height of image - 'w': im_w, # width - 'is_crowd': is_crowd, - 'gt_score': gt_score, - 'gt_class': gt_class, - 'gt_bbox': gt_bbox, - 'gt_poly': gt_poly, - } - 'cname2cid' is a dict used to map category name to class id - """ - assert anno_path.endswith('.json'), 'invalid coco annotation file: ' \ - + anno_path - coco = COCO(anno_path) - img_ids = coco.getImgIds() - cat_ids = coco.getCatIds() - records = [] - ct = 0 - - # when with_background = True, mapping category to classid, like: - # background:0, first_class:1, second_class:2, ... - catid2clsid = dict( - {catid: i + int(with_background) - for i, catid in enumerate(cat_ids)}) - cname2cid = dict({ - coco.loadCats(catid)[0]['name']: clsid - for catid, clsid in catid2clsid.items() - }) - - for img_id in img_ids: - img_anno = coco.loadImgs(img_id)[0] - im_fname = img_anno['file_name'] - im_w = float(img_anno['width']) - im_h = float(img_anno['height']) - - ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False) - instances = coco.loadAnns(ins_anno_ids) - - bboxes = [] - for inst in instances: - x, y, box_w, box_h = inst['bbox'] - x1 = max(0, x) - y1 = max(0, y) - x2 = min(im_w - 1, x1 + max(0, box_w - 1)) - y2 = min(im_h - 1, y1 + max(0, box_h - 1)) - if inst['area'] > 0 and x2 >= x1 and y2 >= y1: - inst['clean_bbox'] = [x1, y1, x2, y2] - bboxes.append(inst) - else: - logger.warn( - 'Found an invalid bbox in annotations: im_id: {}, area: {} x1: {}, y1: {}, x2: {}, y2: {}.'. - format(img_id, float(inst['area']), x1, y1, x2, y2)) - num_bbox = len(bboxes) - - gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) - gt_class = np.zeros((num_bbox, 1), dtype=np.int32) - gt_score = np.ones((num_bbox, 1), dtype=np.float32) - is_crowd = np.zeros((num_bbox, 1), dtype=np.int32) - difficult = np.zeros((num_bbox, 1), dtype=np.int32) - gt_poly = [None] * num_bbox - - for i, box in enumerate(bboxes): - catid = box['category_id'] - gt_class[i][0] = catid2clsid[catid] - gt_bbox[i, :] = box['clean_bbox'] - is_crowd[i][0] = box['iscrowd'] - if 'segmentation' in box: - gt_poly[i] = box['segmentation'] - - coco_rec = { - 'im_file': im_fname, - 'im_id': np.array([img_id]), - 'h': im_h, - 'w': im_w, - 'is_crowd': is_crowd, - 'gt_class': gt_class, - 'gt_bbox': gt_bbox, - 'gt_score': gt_score, - 'gt_poly': gt_poly, - 'difficult': difficult - } - - logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format( - im_fname, img_id, im_h, im_w)) - records.append(coco_rec) - ct += 1 - if sample_num > 0 and ct >= sample_num: - break - assert len(records) > 0, 'not found any coco record in %s' % (anno_path) - logger.info('{} samples in file {}'.format(ct, anno_path)) - return records, cname2cid diff --git a/PaddleCV/PaddleDetection/ppdet/data/source/iterator_source.py b/PaddleCV/PaddleDetection/ppdet/data/source/iterator_source.py deleted file mode 100644 index 2785d4843e660843b554197a82530c8129244321..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/source/iterator_source.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import numpy as np -import copy - -import logging -logger = logging.getLogger(__name__) - -from ..dataset import Dataset - - -class IteratorSource(Dataset): - """ - Load data samples from iterator in stream mode - - Args: - iter_maker (callable): callable function to generate a iter - samples (int): number of samples to load, -1 means all - """ - - def __init__(self, - iter_maker, - samples=-1, - **kwargs): - super(IteratorSource, self).__init__() - self._epoch = -1 - - self._iter_maker = iter_maker - self._data_iter = None - self._pos = -1 - self._drained = False - self._samples = samples - self._sample_num = -1 - - def next(self): - if self._epoch < 0: - self.reset() - - if self._data_iter is not None: - try: - sample = next(self._data_iter) - self._pos += 1 - ret = sample - except StopIteration as e: - if self._sample_num <= 0: - self._sample_num = self._pos - elif self._sample_num != self._pos: - logger.info('num of loaded samples is different ' - 'with previouse setting[prev:%d,now:%d]' % (self._sample_num, self._pos)) - self._sample_num = self._pos - - self._data_iter = None - self._drained = True - raise e - else: - raise StopIteration("no more data in " + str(self)) - - if self._samples > 0 and self._pos >= self._samples: - self._data_iter = None - self._drained = True - raise StopIteration("no more data in " + str(self)) - else: - return ret - - def reset(self): - if self._data_iter is None: - self._data_iter = self._iter_maker() - - if self._epoch < 0: - self._epoch = 0 - else: - self._epoch += 1 - - self._pos = 0 - self._drained = False - - def size(self): - return self._sample_num - - def drained(self): - assert self._epoch >= 0, "the first epoch has not started yet" - return self._pos >= self.size() - - def epoch_id(self): - return self._epoch - diff --git a/PaddleCV/PaddleDetection/ppdet/data/source/loader.py b/PaddleCV/PaddleDetection/ppdet/data/source/loader.py deleted file mode 100644 index bc73c8ea67b3bd82ca38ba4a65fd7a185a0c86b1..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/source/loader.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# function: -# load data records from local files(maybe in COCO or VOC data formats) - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import os - -import numpy as np -import logging -import pickle as pkl - -logger = logging.getLogger(__name__) - - -def check_records(records): - """ check the fields of 'records' must contains some keys - """ - needed_fields = [ - 'im_file', 'im_id', 'h', 'w', 'is_crowd', 'gt_class', 'gt_bbox', - 'gt_poly' - ] - - for i, rec in enumerate(records): - for k in needed_fields: - assert k in rec, 'not found field[%s] in record[%d]' % (k, i) - - -def load_roidb(anno_file, sample_num=-1): - """ load normalized data records from file - 'anno_file' which is a pickled file. - And the records should has a structure: - { - 'im_file': str, # image file name - 'im_id': int, # image id - 'h': int, # height of image - 'w': int, # width of image - 'is_crowd': bool, - 'gt_class': list of np.ndarray, # classids info - 'gt_bbox': list of np.ndarray, # bounding box info - 'gt_poly': list of int, # poly info - } - - Args: - anno_file (str): file name for picked records - sample_num (int): number of samples to load - - Returns: - list of records for detection model training - """ - - assert anno_file.endswith('.roidb'), 'invalid roidb file[%s]' % (anno_file) - with open(anno_file, 'rb') as f: - roidb = f.read() - # for support python3 and python2 - try: - records, cname2cid = pkl.loads(roidb, encoding='bytes') - except: - records, cname2cid = pkl.loads(roidb) - - assert type(records) is list, 'invalid data type from roidb' - - if sample_num > 0 and sample_num < len(records): - records = records[:sample_num] - - return records, cname2cid - - -def load(fname, - samples=-1, - with_background=True, - with_cat2id=False, - use_default_label=None, - cname2cid=None): - """ Load data records from 'fnames' - - Args: - fnames (str): file name for data record, eg: - instances_val2017.json or COCO17_val2017.roidb - samples (int): number of samples to load, default to all - with_background (bool): whether load background as a class. - default True. - with_cat2id (bool): whether return cname2cid info out - use_default_label (bool): whether use the default mapping of label to id - cname2cid (dict): the mapping of category name to id - - Returns: - list of loaded records whose structure is: - { - 'im_file': str, # image file name - 'im_id': int, # image id - 'h': int, # height of image - 'w': int, # width of image - 'is_crowd': bool, - 'gt_class': list of np.ndarray, # classids info - 'gt_bbox': list of np.ndarray, # bounding box info - 'gt_poly': list of int, # poly info - } - - """ - - if fname.endswith('.roidb'): - records, cname2cid = load_roidb(fname, samples) - elif fname.endswith('.json'): - from . import coco_loader - records, cname2cid = coco_loader.load(fname, samples, with_background) - elif "wider_face" in fname: - from . import widerface_loader - records = widerface_loader.load(fname, samples) - return records - elif os.path.isfile(fname): - from . import voc_loader - if use_default_label is None or cname2cid is not None: - records, cname2cid = voc_loader.get_roidb(fname, samples, cname2cid, - with_background=with_background) - else: - records, cname2cid = voc_loader.load(fname, samples, - use_default_label, - with_background=with_background) - else: - raise ValueError('invalid file type when load data from file[%s]' % - (fname)) - check_records(records) - if with_cat2id: - return records, cname2cid - else: - return records diff --git a/PaddleCV/PaddleDetection/ppdet/data/source/roidb_source.py b/PaddleCV/PaddleDetection/ppdet/data/source/roidb_source.py deleted file mode 100644 index 7d42e87b898010235f794ffdc3d6b3e4ee2adc24..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/source/roidb_source.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#function: -# interface to load data from local files and parse it for samples, -# eg: roidb data in pickled files - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import os -import random - -import copy -import pickle as pkl -from ..dataset import Dataset - - -class RoiDbSource(Dataset): - """ interface to load roidb data from files - """ - - def __init__(self, - anno_file, - image_dir=None, - samples=-1, - is_shuffle=True, - load_img=False, - cname2cid=None, - use_default_label=None, - mixup_epoch=-1, - with_background=True): - """ Init - - Args: - fname (str): label file path - image_dir (str): root dir for images - samples (int): samples to load, -1 means all - is_shuffle (bool): whether to shuffle samples - load_img (bool): whether load data in this class - cname2cid (dict): the label name to id dictionary - use_default_label (bool):whether use the default mapping of label to id - mixup_epoch (int): parse mixup in first n epoch - with_background (bool): whether load background - as a class - """ - super(RoiDbSource, self).__init__() - self._epoch = -1 - assert os.path.isfile(anno_file) or os.path.isdir(anno_file), \ - 'anno_file {} is not a file or a directory'.format(anno_file) - self._fname = anno_file - self._image_dir = image_dir if image_dir is not None else '' - if image_dir is not None: - assert os.path.isdir(image_dir), \ - 'image_dir {} is not a directory'.format(image_dir) - self._roidb = None - self._pos = -1 - self._drained = False - self._samples = samples - self._is_shuffle = is_shuffle - self._load_img = load_img - self.use_default_label = use_default_label - self._mixup_epoch = mixup_epoch - self._with_background = with_background - self.cname2cid = cname2cid - self._imid2path = None - - def __str__(self): - return 'RoiDbSource(fname:%s,epoch:%d,size:%d,pos:%d)' \ - % (self._fname, self._epoch, self.size(), self._pos) - - def next(self): - """ load next sample - """ - if self._epoch < 0: - self.reset() - if self._pos >= self._samples: - self._drained = True - raise StopIteration('%s no more data' % (str(self))) - sample = copy.deepcopy(self._roidb[self._pos]) - if self._load_img: - sample['image'] = self._load_image(sample['im_file']) - else: - sample['im_file'] = os.path.join(self._image_dir, sample['im_file']) - - if self._epoch < self._mixup_epoch: - mix_idx = random.randint(1, self._samples - 1) - mix_pos = (mix_idx + self._pos) % self._samples - sample['mixup'] = copy.deepcopy(self._roidb[mix_pos]) - if self._load_img: - sample['mixup']['image'] = \ - self._load_image(sample['mixup']['im_file']) - else: - sample['mixup']['im_file'] = \ - os.path.join(self._image_dir, sample['mixup']['im_file']) - self._pos += 1 - return sample - - def _load(self): - """ load data from file - """ - from . import loader - records, cname2cid = loader.load(self._fname, self._samples, - self._with_background, True, - self.use_default_label, self.cname2cid) - self.cname2cid = cname2cid - return records - - def _load_image(self, where): - fn = os.path.join(self._image_dir, where) - with open(fn, 'rb') as f: - return f.read() - - def reset(self): - """ implementation of Dataset.reset - """ - if self._roidb is None: - self._roidb = self._load() - - self._samples = len(self._roidb) - if self._is_shuffle: - random.shuffle(self._roidb) - - if self._epoch < 0: - self._epoch = 0 - else: - self._epoch += 1 - - self._pos = 0 - self._drained = False - - def size(self): - """ implementation of Dataset.size - """ - return len(self._roidb) - - def drained(self): - """ implementation of Dataset.drained - """ - assert self._epoch >= 0, 'The first epoch has not begin!' - return self._pos >= self.size() - - def epoch_id(self): - """ return epoch id for latest sample - """ - return self._epoch - - def get_imid2path(self): - """return image id to image path map""" - if self._imid2path is None: - self._imid2path = {} - for record in self._roidb: - im_id = record['im_id'] - im_id = im_id if isinstance(im_id, int) else im_id[0] - im_path = os.path.join(self._image_dir, record['im_file']) - self._imid2path[im_id] = im_path - return self._imid2path diff --git a/PaddleCV/PaddleDetection/ppdet/data/source/simple_source.py b/PaddleCV/PaddleDetection/ppdet/data/source/simple_source.py deleted file mode 100644 index a65dd054e369d97b303b9018facac3381440dfec..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/source/simple_source.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# function: -# interface to load data from txt file. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import numpy as np -import copy -from ..dataset import Dataset - - -class SimpleSource(Dataset): - """ - Load image files for testing purpose - - Args: - images (list): list of path of images - samples (int): number of samples to load, -1 means all - load_img (bool): should images be loaded - """ - - def __init__(self, - images=[], - samples=-1, - load_img=True, - **kwargs): - super(SimpleSource, self).__init__() - self._epoch = -1 - for image in images: - assert image != '' and os.path.isfile(image), \ - "Image {} not found".format(image) - self._images = images - self._fname = None - self._simple = None - self._pos = -1 - self._drained = False - self._samples = samples - self._load_img = load_img - self._imid2path = {} - - def next(self): - if self._epoch < 0: - self.reset() - - if self._pos >= self.size(): - self._drained = True - raise StopIteration("no more data in " + str(self)) - else: - sample = copy.deepcopy(self._simple[self._pos]) - if self._load_img: - sample['image'] = self._load_image(sample['im_file']) - - self._pos += 1 - return sample - - def _load(self): - ct = 0 - records = [] - for image in self._images: - if self._samples > 0 and ct >= self._samples: - break - rec = {'im_id': np.array([ct]), 'im_file': image} - self._imid2path[ct] = image - ct += 1 - records.append(rec) - assert len(records) > 0, "no image file found" - return records - - def _load_image(self, where): - with open(where, 'rb') as f: - return f.read() - - def reset(self): - if self._simple is None: - self._simple = self._load() - - if self._epoch < 0: - self._epoch = 0 - else: - self._epoch += 1 - - self._pos = 0 - self._drained = False - - def size(self): - return len(self._simple) - - def drained(self): - assert self._epoch >= 0, "the first epoch has not started yet" - return self._pos >= self.size() - - def epoch_id(self): - return self._epoch - - def get_imid2path(self): - """return image id to image path map""" - return self._imid2path diff --git a/PaddleCV/PaddleDetection/ppdet/data/source/voc_loader.py b/PaddleCV/PaddleDetection/ppdet/data/source/voc_loader.py deleted file mode 100644 index 8fc1b79547b8e70b612e956c9c0ac3b1427f5b19..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/source/voc_loader.py +++ /dev/null @@ -1,272 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np - -import xml.etree.ElementTree as ET - - -def get_roidb(anno_path, - sample_num=-1, - cname2cid=None, - with_background=True): - """ - Load VOC records with annotations in xml directory 'anno_path' - - Notes: - ${anno_path} must contains xml file and image file path for annotations - - Args: - anno_path (str): root directory for voc annotation data - sample_num (int): number of samples to load, -1 means all - cname2cid (dict): the label name to id dictionary - with_background (bool): whether load background as a class. - if True, total class number will - be 81. default True - - Returns: - (records, catname2clsid) - 'records' is list of dict whose structure is: - { - 'im_file': im_fname, # image file name - 'im_id': im_id, # image id - 'h': im_h, # height of image - 'w': im_w, # width - 'is_crowd': is_crowd, - 'gt_class': gt_class, - 'gt_bbox': gt_bbox, - 'gt_poly': gt_poly, - } - 'cname2id' is a dict to map category name to class id - """ - - data_dir = os.path.dirname(anno_path) - - records = [] - ct = 0 - existence = False if cname2cid is None else True - if cname2cid is None: - cname2cid = {} - - # mapping category name to class id - # background:0, first_class:1, second_class:2, ... - with open(anno_path, 'r') as fr: - while True: - line = fr.readline() - if not line: - break - img_file, xml_file = [os.path.join(data_dir, x) \ - for x in line.strip().split()[:2]] - if not os.path.isfile(xml_file): - continue - tree = ET.parse(xml_file) - if tree.find('id') is None: - im_id = np.array([ct]) - else: - im_id = np.array([int(tree.find('id').text)]) - - objs = tree.findall('object') - im_w = float(tree.find('size').find('width').text) - im_h = float(tree.find('size').find('height').text) - gt_bbox = np.zeros((len(objs), 4), dtype=np.float32) - gt_class = np.zeros((len(objs), 1), dtype=np.int32) - gt_score = np.ones((len(objs), 1), dtype=np.float32) - is_crowd = np.zeros((len(objs), 1), dtype=np.int32) - difficult = np.zeros((len(objs), 1), dtype=np.int32) - for i, obj in enumerate(objs): - cname = obj.find('name').text - if not existence and cname not in cname2cid: - # the background's id is 0, so need to add 1. - cname2cid[cname] = len(cname2cid) + int(with_background) - elif existence and cname not in cname2cid: - raise KeyError( - 'Not found cname[%s] in cname2cid when map it to cid.' % - (cname)) - gt_class[i][0] = cname2cid[cname] - _difficult = int(obj.find('difficult').text) - x1 = float(obj.find('bndbox').find('xmin').text) - y1 = float(obj.find('bndbox').find('ymin').text) - x2 = float(obj.find('bndbox').find('xmax').text) - y2 = float(obj.find('bndbox').find('ymax').text) - x1 = max(0, x1) - y1 = max(0, y1) - x2 = min(im_w - 1, x2) - y2 = min(im_h - 1, y2) - gt_bbox[i] = [x1, y1, x2, y2] - is_crowd[i][0] = 0 - difficult[i][0] = _difficult - voc_rec = { - 'im_file': img_file, - 'im_id': im_id, - 'h': im_h, - 'w': im_w, - 'is_crowd': is_crowd, - 'gt_class': gt_class, - 'gt_score': gt_score, - 'gt_bbox': gt_bbox, - 'gt_poly': [], - 'difficult': difficult - } - if len(objs) != 0: - records.append(voc_rec) - - ct += 1 - if sample_num > 0 and ct >= sample_num: - break - assert len(records) > 0, 'not found any voc record in %s' % (anno_path) - return [records, cname2cid] - - -def load(anno_path, - sample_num=-1, - use_default_label=True, - with_background=True): - """ - Load VOC records with annotations in - xml directory 'anno_path' - - Notes: - ${anno_path} must contains xml file and image file path for annotations - - Args: - @anno_path (str): root directory for voc annotation data - @sample_num (int): number of samples to load, -1 means all - @use_default_label (bool): whether use the default mapping of label to id - @with_background (bool): whether load background as a class. - if True, total class number will - be 81. default True - - Returns: - (records, catname2clsid) - 'records' is list of dict whose structure is: - { - 'im_file': im_fname, # image file name - 'im_id': im_id, # image id - 'h': im_h, # height of image - 'w': im_w, # width - 'is_crowd': is_crowd, - 'gt_class': gt_class, - 'gt_bbox': gt_bbox, - 'gt_poly': gt_poly, - } - 'cname2id' is a dict to map category name to class id - """ - - data_dir = os.path.dirname(anno_path) - - # mapping category name to class id - # if with_background is True: - # background:0, first_class:1, second_class:2, ... - # if with_background is False: - # first_class:0, second_class:1, ... - records = [] - ct = 0 - cname2cid = {} - if not use_default_label: - label_path = os.path.join(data_dir, 'label_list.txt') - with open(label_path, 'r') as fr: - label_id = int(with_background) - for line in fr.readlines(): - cname2cid[line.strip()] = label_id - label_id += 1 - else: - cname2cid = pascalvoc_label(with_background) - - with open(anno_path, 'r') as fr: - while True: - line = fr.readline() - if not line: - break - img_file, xml_file = [os.path.join(data_dir, x) \ - for x in line.strip().split()[:2]] - if not os.path.isfile(xml_file): - continue - tree = ET.parse(xml_file) - if tree.find('id') is None: - im_id = np.array([ct]) - else: - im_id = np.array([int(tree.find('id').text)]) - - objs = tree.findall('object') - im_w = float(tree.find('size').find('width').text) - im_h = float(tree.find('size').find('height').text) - gt_bbox = np.zeros((len(objs), 4), dtype=np.float32) - gt_class = np.zeros((len(objs), 1), dtype=np.int32) - gt_score = np.ones((len(objs), 1), dtype=np.float32) - is_crowd = np.zeros((len(objs), 1), dtype=np.int32) - difficult = np.zeros((len(objs), 1), dtype=np.int32) - for i, obj in enumerate(objs): - cname = obj.find('name').text - gt_class[i][0] = cname2cid[cname] - _difficult = int(obj.find('difficult').text) - x1 = float(obj.find('bndbox').find('xmin').text) - y1 = float(obj.find('bndbox').find('ymin').text) - x2 = float(obj.find('bndbox').find('xmax').text) - y2 = float(obj.find('bndbox').find('ymax').text) - x1 = max(0, x1) - y1 = max(0, y1) - x2 = min(im_w - 1, x2) - y2 = min(im_h - 1, y2) - gt_bbox[i] = [x1, y1, x2, y2] - is_crowd[i][0] = 0 - difficult[i][0] = _difficult - voc_rec = { - 'im_file': img_file, - 'im_id': im_id, - 'h': im_h, - 'w': im_w, - 'is_crowd': is_crowd, - 'gt_class': gt_class, - 'gt_score': gt_score, - 'gt_bbox': gt_bbox, - 'gt_poly': [], - 'difficult': difficult - } - if len(objs) != 0: - records.append(voc_rec) - - ct += 1 - if sample_num > 0 and ct >= sample_num: - break - assert len(records) > 0, 'not found any voc record in %s' % (anno_path) - return [records, cname2cid] - - -def pascalvoc_label(with_background=True): - labels_map = { - 'aeroplane': 1, - 'bicycle': 2, - 'bird': 3, - 'boat': 4, - 'bottle': 5, - 'bus': 6, - 'car': 7, - 'cat': 8, - 'chair': 9, - 'cow': 10, - 'diningtable': 11, - 'dog': 12, - 'horse': 13, - 'motorbike': 14, - 'person': 15, - 'pottedplant': 16, - 'sheep': 17, - 'sofa': 18, - 'train': 19, - 'tvmonitor': 20 - } - if not with_background: - labels_map = {k: v - 1 for k, v in labels_map.items()} - return labels_map diff --git a/PaddleCV/PaddleDetection/ppdet/data/source/widerface_loader.py b/PaddleCV/PaddleDetection/ppdet/data/source/widerface_loader.py deleted file mode 100644 index 97ed476004e8c7a6ef67dfe155487efd2277f15c..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/source/widerface_loader.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -import logging -logger = logging.getLogger(__name__) - - -def load(anno_path, - sample_num=-1, - cname2cid=None, - with_background=True): - """ - Load WiderFace records with 'anno_path' - - Args: - anno_path (str): root directory for voc annotation data - sample_num (int): number of samples to load, -1 means all - with_background (bool): whether load background as a class. - if True, total class number will - be 2. default True - - Returns: - (records, catname2clsid) - 'records' is list of dict whose structure is: - { - 'im_file': im_fname, # image file name - 'im_id': im_id, # image id - 'gt_class': gt_class, - 'gt_bbox': gt_bbox, - } - 'cname2id' is a dict to map category name to class id - """ - - txt_file = anno_path - - records = [] - ct = 0 - file_lists = _load_file_list(txt_file) - cname2cid = widerface_label(with_background) - - for item in file_lists: - im_fname = item[0] - im_id = np.array([ct]) - gt_bbox = np.zeros((len(item) - 2, 4), dtype=np.float32) - gt_class = np.ones((len(item) - 2, 1), dtype=np.int32) - for index_box in range(len(item)): - if index_box >= 2: - temp_info_box = item[index_box].split(' ') - xmin = float(temp_info_box[0]) - ymin = float(temp_info_box[1]) - w = float(temp_info_box[2]) - h = float(temp_info_box[3]) - # Filter out wrong labels - if w < 0 or h < 0: - continue - xmin = max(0, xmin) - ymin = max(0, ymin) - xmax = xmin + w - ymax = ymin + h - gt_bbox[index_box - 2] = [xmin, ymin, xmax, ymax] - - widerface_rec = { - 'im_file': im_fname, - 'im_id': im_id, - 'gt_bbox': gt_bbox, - 'gt_class': gt_class, - } - # logger.debug - if len(item) != 0: - records.append(widerface_rec) - - ct += 1 - if sample_num > 0 and ct >= sample_num: - break - assert len(records) > 0, 'not found any widerface in %s' % (anno_path) - logger.info('{} samples in file {}'.format(ct, anno_path)) - return records, cname2cid - - -def _load_file_list(input_txt): - with open(input_txt, 'r') as f_dir: - lines_input_txt = f_dir.readlines() - - file_dict = {} - num_class = 0 - for i in range(len(lines_input_txt)): - line_txt = lines_input_txt[i].strip('\n\t\r') - if '.jpg' in line_txt: - if i != 0: - num_class += 1 - file_dict[num_class] = [] - file_dict[num_class].append(line_txt) - if '.jpg' not in line_txt: - if len(line_txt) > 6: - split_str = line_txt.split(' ') - x1_min = float(split_str[0]) - y1_min = float(split_str[1]) - x2_max = float(split_str[2]) - y2_max = float(split_str[3]) - line_txt = str(x1_min) + ' ' + str(y1_min) + ' ' + str( - x2_max) + ' ' + str(y2_max) - file_dict[num_class].append(line_txt) - else: - file_dict[num_class].append(line_txt) - - return list(file_dict.values()) - - -def widerface_label(with_background=True): - labels_map = { - 'face': 1 - } - if not with_background: - labels_map = {k: v - 1 for k, v in labels_map.items()} - return labels_map diff --git a/PaddleCV/PaddleDetection/ppdet/data/tests/000012.jpg b/PaddleCV/PaddleDetection/ppdet/data/tests/000012.jpg deleted file mode 100644 index b829107b842f6f15706744fdcbea05ec7341b311..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/ppdet/data/tests/000012.jpg and /dev/null differ diff --git a/PaddleCV/PaddleDetection/ppdet/data/tests/coco.yml b/PaddleCV/PaddleDetection/ppdet/data/tests/coco.yml deleted file mode 100644 index 80ae7ed9e424601082e233e7158d9c0a28f1727b..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/tests/coco.yml +++ /dev/null @@ -1,48 +0,0 @@ -DATA: - TRAIN: - ANNO_FILE: data/coco.test/train2017.roidb - IMAGE_DIR: data/coco.test/train2017 - SAMPLES: 10 - TYPE: RoiDbSource - VAL: - ANNO_FILE: data/coco.test/val2017.roidb - IMAGE_DIR: data/coco.test/val2017 - SAMPLES: 10 - TYPE: RoiDbSource -TRANSFORM: - TRAIN: - OPS: - - OP: DecodeImage - TO_RGB: False - - OP: RandomFlipImage - PROB: 0.5 - - OP: NormalizeImage - MEAN: [102.9801, 115.9465, 122.7717] - IS_SCALE: False - IS_CHANNEL_FIRST: False - - OP: ResizeImage - TARGET_SIZE: 800 - MAX_SIZE: 1333 - - OP: Permute - TO_BGR: False - - OP: ArrangeRCNN - BATCH_SIZE: 1 - IS_PADDING: True - DROP_LAST: False - WORKER_CONF: - BUFSIZE: 100 - WORKER_NUM: 4 - USE_PROCESS: True - MEMSIZE: 2G - VAL: - OPS: - - OP: DecodeImage - TO_RGB: True - - OP: ResizeImage - TARGET_SIZE: 224 - - OP: ArrangeSSD - BATCH_SIZE: 1 - WORKER_CONF: - BUFSIZE: 100 - WORKER_NUM: 4 - USE_PROCESS: True diff --git a/PaddleCV/PaddleDetection/ppdet/data/tests/data/prepare_data.sh b/PaddleCV/PaddleDetection/ppdet/data/tests/data/prepare_data.sh deleted file mode 100755 index a81abc5d80b16e795d8020d00a27ef7c851f0185..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/tests/data/prepare_data.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -#function: -# prepare coco data for testing - -root=$(dirname `readlink -f ${BASH_SOURCE}[0]`) -cwd=`pwd` - -if [[ $cwd != $root ]];then - pushd $root 2>&1 1>/dev/null -fi - -test_coco_python2_url="http://filecenter.matrix.baidu.com/api/v1/file/wanglong03/coco.test.python2.zip/20190603095315/download" -test_coco_python3_url="http://filecenter.matrix.baidu.com/api/v1/file/wanglong03/coco.test.python3.zip/20190603095447/download" - -if [[ $1 = "python2" ]];then - test_coco_data_url=${test_coco_python2_url} - coco_zip_file="coco.test.python2.zip" -else - test_coco_data_url=${test_coco_python3_url} - coco_zip_file="coco.test.python3.zip" -fi -echo "download testing coco from url[${test_coco_data_url}]" -coco_root_dir=${coco_zip_file/.zip/} - -# clear already exist file or directory -rm -rf ${coco_root_dir} ${coco_zip_file} - -wget ${test_coco_data_url} -O ${coco_zip_file} -if [ -e $coco_zip_file ];then - echo "succeed to download ${coco_zip_file}, so unzip it" - unzip ${coco_zip_file} >/dev/null 2>&1 -fi - -if [ -e ${coco_root_dir} ];then - rm -rf coco.test - ln -s ${coco_root_dir} coco.test - echo "succeed to generate coco data in[${coco_root_dir}] for testing" - exit 0 -else - echo "failed to generate coco data" - exit 1 -fi diff --git a/PaddleCV/PaddleDetection/ppdet/data/tests/rcnn_dataset.yml b/PaddleCV/PaddleDetection/ppdet/data/tests/rcnn_dataset.yml deleted file mode 100644 index b57fd55e94df68a4a9ddbbaa60ca20226377c451..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/tests/rcnn_dataset.yml +++ /dev/null @@ -1,32 +0,0 @@ -DATA: - TRAIN: - ANNO_FILE: data/coco.test/train2017.roidb - IMAGE_DIR: data/coco.test/train2017 - SAMPLES: 10 - IS_SHUFFLE: True - TYPE: RoiDbSource -TRANSFORM: - TRAIN: - OPS: - - OP: DecodeImage - TO_RGB: False - - OP: RandomFlipImage - PROB: 0.5 - - OP: NormalizeImage - MEAN: [102.9801, 115.9465, 122.7717] - IS_SCALE: False - IS_CHANNEL_FIRST: False - - OP: ResizeImage - TARGET_SIZE: 800 - MAX_SIZE: 1333 - - OP: Permute - TO_BGR: False - - OP: ArrangeRCNN - BATCH_SIZE: 1 - IS_PADDING: True - DROP_LAST: False - WORKER_CONF: - BUFSIZE: 100 - WORKER_NUM: 4 - MEMSIZE: 2G - USE_PROCESS: True diff --git a/PaddleCV/PaddleDetection/ppdet/data/tests/run_all_tests.py b/PaddleCV/PaddleDetection/ppdet/data/tests/run_all_tests.py deleted file mode 100644 index a1882d5dd38a4da14335244e7603af47568b7bbf..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/tests/run_all_tests.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#!/usr/bin/python -#-*-coding:utf-8-*- -"""Run all tests -""" - -import unittest -import test_loader -import test_operator -import test_roidb_source -import test_iterator_source -import test_transformer -import test_reader - -if __name__ == '__main__': - alltests = unittest.TestSuite([ - unittest.TestLoader().loadTestsFromTestCase(t) \ - for t in [ - test_loader.TestLoader, - test_operator.TestBase, - test_roidb_source.TestRoiDbSource, - test_iterator_source.TestIteratorSource, - test_transformer.TestTransformer, - test_reader.TestReader, - ] - ]) - - was_succ = unittest\ - .TextTestRunner(verbosity=2)\ - .run(alltests)\ - .wasSuccessful() - - exit(0 if was_succ else 1) diff --git a/PaddleCV/PaddleDetection/ppdet/data/tests/set_env.py b/PaddleCV/PaddleDetection/ppdet/data/tests/set_env.py deleted file mode 100644 index bc46ac0f10e3b1690cf59bd6803eedd0bee6d9d4..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/tests/set_env.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import os -import six -import logging - -import matplotlib -matplotlib.use('Agg', force=False) - -prefix = os.path.dirname(os.path.abspath(__file__)) - -#coco data for testing -if six.PY3: - version = 'python3' -else: - version = 'python2' - -data_root = os.path.join(prefix, 'data/coco.test.%s' % (version)) - -# coco data for testing -coco_data = { - 'TRAIN': { - 'ANNO_FILE': os.path.join(data_root, 'train2017.roidb'), - 'IMAGE_DIR': os.path.join(data_root, 'train2017') - }, - 'VAL': { - 'ANNO_FILE': os.path.join(data_root, 'val2017.roidb'), - 'IMAGE_DIR': os.path.join(data_root, 'val2017') - } -} - -script = os.path.join(os.path.dirname(__file__), 'data/prepare_data.sh') - -if not os.path.exists(data_root): - ret = os.system('bash %s %s' % (script, version)) - if ret != 0: - logging.error('not found file[%s], you should manually prepare ' - 'your data using "data/prepare_data.sh"' % (data_root)) - sys.exit(1) diff --git a/PaddleCV/PaddleDetection/ppdet/data/tests/test_iterator_source.py b/PaddleCV/PaddleDetection/ppdet/data/tests/test_iterator_source.py deleted file mode 100644 index c32a734fb258dce2205d24f1dc60479fadaf57a6..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/tests/test_iterator_source.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import time -import unittest -import sys -import logging - -import set_env -from ppdet.data.source import IteratorSource - - -def _generate_iter_maker(num=10): - def _reader(): - for i in range(num): - yield {'image': 'image_' + str(i), 'label': i} - - return _reader - -class TestIteratorSource(unittest.TestCase): - """Test cases for dataset.source.roidb_source - """ - - @classmethod - def setUpClass(cls): - """ setup - """ - pass - - @classmethod - def tearDownClass(cls): - """ tearDownClass """ - pass - - def test_basic(self): - """ test basic apis 'next/size/drained' - """ - iter_maker = _generate_iter_maker() - iter_source = IteratorSource(iter_maker) - for i, sample in enumerate(iter_source): - self.assertTrue('image' in sample) - self.assertGreater(len(sample['image']), 0) - self.assertTrue(iter_source.drained()) - self.assertEqual(i + 1, iter_source.size()) - - def test_reset(self): - """ test functions 'reset/epoch_id' - """ - iter_maker = _generate_iter_maker() - iter_source = IteratorSource(iter_maker) - - self.assertTrue(iter_source.next() is not None) - self.assertEqual(iter_source.epoch_id(), 0) - - iter_source.reset() - - self.assertEqual(iter_source.epoch_id(), 1) - self.assertTrue(iter_source.next() is not None) - - -if __name__ == '__main__': - unittest.main() diff --git a/PaddleCV/PaddleDetection/ppdet/data/tests/test_loader.py b/PaddleCV/PaddleDetection/ppdet/data/tests/test_loader.py deleted file mode 100644 index dc835aa0f12ecb82e267c57f7a9f875a9adc48ce..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/tests/test_loader.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -import os -import time -import unittest -import sys -import logging -import numpy as np - -import set_env - - -class TestLoader(unittest.TestCase): - """Test cases for dataset.source.loader - """ - - @classmethod - def setUpClass(cls): - """ setup - """ - cls.prefix = os.path.dirname(os.path.abspath(__file__)) - # json data - cls.anno_path = os.path.join(cls.prefix, - 'data/coco/instances_val2017.json') - cls.image_dir = os.path.join(cls.prefix, 'data/coco/val2017') - cls.anno_path1 = os.path.join(cls.prefix, - "data/voc/ImageSets/Main/train.txt") - cls.image_dir1 = os.path.join(cls.prefix, "data/voc/JPEGImages") - - @classmethod - def tearDownClass(cls): - """ tearDownClass """ - pass - - def test_load_coco_in_json(self): - """ test loading COCO data in json file - """ - from ppdet.data.source.coco_loader import load - if not os.path.exists(self.anno_path): - logging.warn('not found %s, so skip this test' % (self.anno_path)) - return - samples = 10 - records, cname2id = load(self.anno_path, samples) - self.assertEqual(len(records), samples) - self.assertGreater(len(cname2id), 0) - - def test_load_coco_in_roidb(self): - """ test loading COCO data in pickled records - """ - anno_path = os.path.join(self.prefix, - 'data/roidbs/instances_val2017.roidb') - - if not os.path.exists(anno_path): - logging.warn('not found %s, so skip this test' % (anno_path)) - return - - samples = 10 - from ppdet.data.source.loader import load_roidb - records, cname2cid = load_roidb(anno_path, samples) - self.assertEqual(len(records), samples) - self.assertGreater(len(cname2cid), 0) - - def test_load_voc_in_xml(self): - """ test loading VOC data in xml files - """ - from ppdet.data.source.voc_loader import load - if not os.path.exists(self.anno_path1): - logging.warn('not found %s, so skip this test' % (self.anno_path1)) - return - samples = 3 - records, cname2cid = load(self.anno_path1, samples) - self.assertEqual(len(records), samples) - self.assertGreater(len(cname2cid), 0) - - def test_load_voc_in_roidb(self): - """ test loading VOC data in pickled records - """ - anno_path = os.path.join(self.prefix, 'data/roidbs/train.roidb') - - if not os.path.exists(anno_path): - logging.warn('not found %s, so skip this test' % (anno_path)) - return - - samples = 3 - from ppdet.data.source.loader import load_roidb - records, cname2cid = load_roidb(anno_path, samples) - self.assertEqual(len(records), samples) - self.assertGreater(len(cname2cid), 0) - - -if __name__ == '__main__': - unittest.main() diff --git a/PaddleCV/PaddleDetection/ppdet/data/tests/test_operator.py b/PaddleCV/PaddleDetection/ppdet/data/tests/test_operator.py deleted file mode 100644 index 85d5b229d48fab968f38639d5f7533e5646f25c3..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/tests/test_operator.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import unittest -import logging -import numpy as np -import set_env -import ppdet.data.transform as tf -logging.basicConfig(level=logging.INFO) - - -class TestBase(unittest.TestCase): - """Test cases for dataset.transform.operator - """ - - @classmethod - def setUpClass(cls, with_mixup=False): - """ setup - """ - roidb_fname = set_env.coco_data['TRAIN']['ANNO_FILE'] - image_dir = set_env.coco_data['TRAIN']['IMAGE_DIR'] - import pickle as pkl - with open(roidb_fname, 'rb') as f: - roidb = f.read() - roidb = pkl.loads(roidb) - fn = os.path.join(image_dir, roidb[0][0]['im_file']) - with open(fn, 'rb') as f: - roidb[0][0]['image'] = f.read() - if with_mixup: - mixup_fn = os.path.join(image_dir, roidb[0][1]['im_file']) - roidb[0][0]['mixup'] = roidb[0][1] - with open(fn, 'rb') as f: - roidb[0][0]['mixup']['image'] = f.read() - cls.sample = roidb[0][0] - - @classmethod - def tearDownClass(cls): - """ tearDownClass """ - pass - - def test_ops_all(self): - """ test operators - """ - # ResizeImage - ops_conf = [{ - 'op': 'DecodeImage' - }, { - 'op': 'ResizeImage', - 'target_size': 300, - 'max_size': 1333 - }] - mapper = tf.build_mapper(ops_conf) - self.assertTrue(mapper is not None) - data = self.sample.copy() - result0 = mapper(data) - self.assertIsNotNone(result0['image']) - self.assertEqual(len(result0['image'].shape), 3) - # RandFlipImage - ops_conf = [{'op': 'RandomFlipImage'}] - mapper = tf.build_mapper(ops_conf) - self.assertTrue(mapper is not None) - result1 = mapper(result0) - self.assertEqual(result1['image'].shape, result0['image'].shape) - self.assertEqual(result1['gt_bbox'].shape, result0['gt_bbox'].shape) - # NormalizeImage - ops_conf = [{'op': 'NormalizeImage', 'is_channel_first': False}] - mapper = tf.build_mapper(ops_conf) - self.assertTrue(mapper is not None) - result2 = mapper(result1) - im1 = result1['image'] - count = np.where(im1 <= 1)[0] - if im1.dtype == 'float64': - self.assertEqual(count, im1.shape[0] * im1.shape[1], im1.shape[2]) - # ArrangeSample - ops_conf = [{'op': 'ArrangeRCNN'}] - mapper = tf.build_mapper(ops_conf) - self.assertTrue(mapper is not None) - result3 = mapper(result2) - self.assertEqual(type(result3), tuple) - - def test_ops_part1(self): - """test Crop and Resize - """ - ops_conf = [{ - 'op': 'DecodeImage' - }, { - 'op': 'NormalizeBox' - }, { - 'op': 'CropImage', - 'batch_sampler': [[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]] - }] - mapper = tf.build_mapper(ops_conf) - self.assertTrue(mapper is not None) - data = self.sample.copy() - result = mapper(data) - self.assertEqual(len(result['image'].shape), 3) - - def test_ops_part2(self): - """test Expand and RandomDistort - """ - ops_conf = [{ - 'op': 'DecodeImage' - }, { - 'op': 'NormalizeBox' - }, { - 'op': 'ExpandImage', - 'max_ratio': 1.5, - 'prob': 1 - }] - mapper = tf.build_mapper(ops_conf) - self.assertTrue(mapper is not None) - data = self.sample.copy() - result = mapper(data) - self.assertEqual(len(result['image'].shape), 3) - self.assertGreater(result['gt_bbox'].shape[0], 0) - - def test_ops_part3(self): - """test Mixup and RandomInterp - """ - ops_conf = [{ - 'op': 'DecodeImage', - 'with_mixup': True, - }, { - 'op': 'MixupImage', - }, { - 'op': 'RandomInterpImage', - 'target_size': 608 - }] - mapper = tf.build_mapper(ops_conf) - self.assertTrue(mapper is not None) - data = self.sample.copy() - result = mapper(data) - self.assertEqual(len(result['image'].shape), 3) - self.assertGreater(result['gt_bbox'].shape[0], 0) - #self.assertGreater(result['gt_score'].shape[0], 0) - - -if __name__ == '__main__': - unittest.main() diff --git a/PaddleCV/PaddleDetection/ppdet/data/tests/test_reader.py b/PaddleCV/PaddleDetection/ppdet/data/tests/test_reader.py deleted file mode 100644 index e94484dff077d9b12a82f7337cd9cb165e8bd23f..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/tests/test_reader.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import time -import unittest -import sys -import logging -import numpy as np -import yaml - -import set_env -from ppdet.data.reader import Reader -from ppdet.data.source import build_source -from ppdet.data.source import IteratorSource - - -class TestReader(unittest.TestCase): - """Test cases for dataset.reader - """ - - @classmethod - def setUpClass(cls): - """ setup - """ - prefix = os.path.dirname(os.path.abspath(__file__)) - coco_yml = os.path.join(prefix, 'coco.yml') - with open(coco_yml, 'rb') as f: - cls.coco_conf = yaml.load(f.read()) - - cls.coco_conf['DATA']['TRAIN'] = set_env.coco_data['TRAIN'] - cls.coco_conf['DATA']['VAL'] = set_env.coco_data['VAL'] - - rcnn_yml = os.path.join(prefix, 'rcnn_dataset.yml') - - with open(rcnn_yml, 'rb') as f: - cls.rcnn_conf = yaml.load(f.read()) - - cls.rcnn_conf['DATA']['TRAIN'] = set_env.coco_data['TRAIN'] - cls.rcnn_conf['DATA']['VAL'] = set_env.coco_data['VAL'] - - @classmethod - def tearDownClass(cls): - """ tearDownClass """ - pass - - def test_train(self): - """ Test reader for training - """ - coco = Reader( - self.coco_conf['DATA'], self.coco_conf['TRANSFORM'], maxiter=1000) - train_rd = coco.train() - self.assertTrue(train_rd is not None) - - ct = 0 - total = 0 - bytes = 0 - prev_ts = None - for sample in train_rd(): - if prev_ts is None: - start_ts = time.time() - prev_ts = start_ts - - ct += 1 - bytes += 4 * sample[0][0].size * len(sample[0]) - self.assertTrue(sample is not None) - cost = time.time() - prev_ts - if cost >= 1.0: - total += ct - qps = total / (time.time() - start_ts) - bps = bytes / (time.time() - start_ts) - - logging.info('got %d/%d samples in %.3fsec with qps:%d bps:%d' % - (ct, total, cost, qps, bps)) - bytes = 0 - ct = 0 - prev_ts = time.time() - - total += ct - self.assertEqual(total, coco._maxiter) - - def test_val(self): - """ Test reader for validation - """ - coco = Reader(self.coco_conf['DATA'], self.coco_conf['TRANSFORM'], 10) - val_rd = coco.val() - self.assertTrue(val_rd is not None) - - # test 3 epoches - for _ in range(3): - ct = 0 - for sample in val_rd(): - ct += 1 - self.assertTrue(sample is not None) - self.assertGreaterEqual(ct, coco._maxiter) - - def test_rcnn_train(self): - """ Test reader for training - """ - anno = self.rcnn_conf['DATA']['TRAIN']['ANNO_FILE'] - if not os.path.exists(anno): - logging.error('exit test_rcnn for not found file[%s]' % (anno)) - return - - rcnn = Reader(self.rcnn_conf['DATA'], self.rcnn_conf['TRANSFORM'], 10) - rcnn_rd = rcnn.train() - self.assertTrue(rcnn_rd is not None) - - ct = 0 - out = None - for sample in rcnn_rd(): - out = sample - ct += 1 - self.assertTrue(sample is not None) - self.assertEqual(out[0][0].shape[0], 3) - self.assertEqual(out[0][1].shape[0], 3) - self.assertEqual(out[0][3].shape[1], 4) - self.assertEqual(out[0][4].shape[1], 1) - self.assertEqual(out[0][5].shape[1], 1) - self.assertGreaterEqual(ct, rcnn._maxiter) - - def test_create(self): - """ Test create a reader using my source - """ - def _my_data_reader(): - mydata = build_source(self.rcnn_conf['DATA']['TRAIN']) - for i, sample in enumerate(mydata): - yield sample - - my_source = IteratorSource(_my_data_reader) - mode = 'TRAIN' - train_rd = Reader.create(mode, - self.rcnn_conf['DATA'][mode], - self.rcnn_conf['TRANSFORM'][mode], - max_iter=10, my_source=my_source) - - out = None - for sample in train_rd(): - out = sample - self.assertTrue(sample is not None) - self.assertEqual(out[0][0].shape[0], 3) - self.assertEqual(out[0][1].shape[0], 3) - self.assertEqual(out[0][3].shape[1], 4) - self.assertEqual(out[0][4].shape[1], 1) - self.assertEqual(out[0][5].shape[1], 1) - - -if __name__ == '__main__': - unittest.main() diff --git a/PaddleCV/PaddleDetection/ppdet/data/tests/test_roidb_source.py b/PaddleCV/PaddleDetection/ppdet/data/tests/test_roidb_source.py deleted file mode 100644 index 105cc9cd7d9d971bf2a3f69cccf251ce2943275a..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/tests/test_roidb_source.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import time -import unittest -import sys -import logging - -import set_env -from ppdet.data.source import build_source - - -class TestRoiDbSource(unittest.TestCase): - """Test cases for dataset.source.roidb_source - """ - - @classmethod - def setUpClass(cls): - """ setup - """ - anno_path = set_env.coco_data['TRAIN']['ANNO_FILE'] - image_dir = set_env.coco_data['TRAIN']['IMAGE_DIR'] - cls.config = { - 'data_cf': { - 'anno_file': anno_path, - 'image_dir': image_dir, - 'samples': 100, - 'load_img': True - }, - 'cname2cid': None - } - - @classmethod - def tearDownClass(cls): - """ tearDownClass """ - pass - - def test_basic(self): - """ test basic apis 'next/size/drained' - """ - roi_source = build_source(self.config) - for i, sample in enumerate(roi_source): - self.assertTrue('image' in sample) - self.assertGreater(len(sample['image']), 0) - self.assertTrue(roi_source.drained()) - self.assertEqual(i + 1, roi_source.size()) - - def test_reset(self): - """ test functions 'reset/epoch_id' - """ - roi_source = build_source(self.config) - - self.assertTrue(roi_source.next() is not None) - self.assertEqual(roi_source.epoch_id(), 0) - - roi_source.reset() - - self.assertEqual(roi_source.epoch_id(), 1) - self.assertTrue(roi_source.next() is not None) - - -if __name__ == '__main__': - unittest.main() diff --git a/PaddleCV/PaddleDetection/ppdet/data/tests/test_transformer.py b/PaddleCV/PaddleDetection/ppdet/data/tests/test_transformer.py deleted file mode 100644 index 9123669293e6d0f432d1e90aa32e933d7fb6851c..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/tests/test_transformer.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import time -import unittest -import sys -import logging -import numpy as np - -import set_env -import ppdet.data.transform as tf -from ppdet.data.source import build_source - -logger = logging.getLogger(__name__) - -logging.basicConfig(level=logging.INFO) - - -class TestTransformer(unittest.TestCase): - """Test cases for dataset.transform.transformer - """ - - @classmethod - def setUpClass(cls): - """ setup - """ - - prefix = os.path.dirname(os.path.abspath(__file__)) - # json data - anno_path = set_env.coco_data['TRAIN']['ANNO_FILE'] - image_dir = set_env.coco_data['TRAIN']['IMAGE_DIR'] - cls.sc_config = { - 'anno_file': anno_path, - 'image_dir': image_dir, - 'samples': 200 - } - - cls.ops = [{ - 'op': 'DecodeImage', - 'to_rgb': True - }, { - 'op': 'ResizeImage', - 'target_size': 800, - 'max_size': 1333 - }, { - 'op': 'ArrangeRCNN', - 'is_mask': False - }] - - @classmethod - def tearDownClass(cls): - """ tearDownClass """ - pass - - def test_map(self): - """ test transformer.map - """ - mapper = tf.build_mapper(self.ops) - ds = build_source(self.sc_config) - mapped_ds = tf.map(ds, mapper) - ct = 0 - for sample in mapped_ds: - self.assertTrue(type(sample[0]) is np.ndarray) - ct += 1 - - self.assertEqual(ct, mapped_ds.size()) - - def test_parallel_map(self): - """ test transformer.map with concurrent workers - """ - mapper = tf.build_mapper(self.ops) - ds = build_source(self.sc_config) - worker_conf = {'WORKER_NUM': 2, 'use_process': True} - mapped_ds = tf.map(ds, mapper, worker_conf) - - ct = 0 - for sample in mapped_ds: - self.assertTrue(type(sample[0]) is np.ndarray) - ct += 1 - - self.assertTrue(mapped_ds.drained()) - self.assertEqual(ct, mapped_ds.size()) - mapped_ds.reset() - - ct = 0 - for sample in mapped_ds: - self.assertTrue(type(sample[0]) is np.ndarray) - ct += 1 - - self.assertEqual(ct, mapped_ds.size()) - - def test_batch(self): - """ test batched dataset - """ - batchsize = 2 - mapper = tf.build_mapper(self.ops) - ds = build_source(self.sc_config) - mapped_ds = tf.map(ds, mapper) - batched_ds = tf.batch(mapped_ds, batchsize, True) - for sample in batched_ds: - out = sample - self.assertEqual(len(out), batchsize) - - -if __name__ == '__main__': - unittest.main() diff --git a/PaddleCV/PaddleDetection/ppdet/data/tools/generate_data_for_training.py b/PaddleCV/PaddleDetection/ppdet/data/tools/generate_data_for_training.py deleted file mode 100644 index 30b196f61418f28edbdb80c95a1d9adbdf00c11b..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/tools/generate_data_for_training.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# function: -# tool used convert COCO or VOC data to a pickled file whose -# schema for each sample is the same. -# -# notes: -# Original data format of COCO or VOC can also be directly -# used by 'PPdetection' to train. -# This tool just convert data to a unified schema, -# and it's useful when debuging with small dataset. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import argparse - -import os -import sys -import logging -import pickle as pkl - -path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../') -if path not in sys.path: - sys.path.insert(0, path) - -from data.source import loader - - -def parse_args(): - """ parse arguments - """ - parser = argparse.ArgumentParser( - description='Generate Standard Dataset for PPdetection') - - parser.add_argument( - '--type', - type=str, - default='json', - help='file format of label file, eg: json for COCO and xml for VOC') - parser.add_argument( - '--annotation', - type=str, - help='label file name for COCO or VOC dataset, ' - 'eg: instances_val2017.json or train.txt') - parser.add_argument( - '--save-dir', - type=str, - default='roidb', - help='directory to save roidb file which contains pickled samples') - parser.add_argument( - '--samples', - type=int, - default=-1, - help='number of samples to dump, default to all') - - args = parser.parse_args() - return args - - -def dump_coco_as_pickle(args): - """ Load COCO data, and then save it as pickled file. - - Notes: - label file of COCO contains a json which consists - of label info for each sample - """ - samples = args.samples - save_dir = args.save_dir - if not os.path.exists(save_dir): - os.makedirs(save_dir) - anno_path = args.annotation - roidb, cat2id = loader.load(anno_path, samples, with_cat2id=True) - samples = len(roidb) - dsname = os.path.basename(anno_path).rstrip('.json') - roidb_fname = save_dir + "/%s.roidb" % (dsname) - with open(roidb_fname, "wb") as fout: - pkl.dump((roidb, cat2id), fout) - - #for rec in roidb: - # sys.stderr.write('%s\n' % (rec['im_file'])) - logging.info('dumped %d samples to file[%s]' % (samples, roidb_fname)) - - -def dump_voc_as_pickle(args): - """ Load VOC data, and then save it as pickled file. - - Notes: - we assume label file of VOC contains lines - each of which corresponds to a xml file - that contains it's label info - """ - samples = args.samples - save_dir = args.save_dir - if not os.path.exists(save_dir): - os.makedirs(save_dir) - save_dir = args.save_dir - anno_path = os.path.expanduser(args.annotation) - roidb, cat2id = loader.load( - anno_path, samples, with_cat2id=True, use_default_label=None) - samples = len(roidb) - part = anno_path.split('/') - dsname = part[-4] - roidb_fname = save_dir + "/%s.roidb" % (dsname) - with open(roidb_fname, "wb") as fout: - pkl.dump((roidb, cat2id), fout) - anno_path = os.path.join(anno_path.split('/train.txt')[0], 'label_list.txt') - with open(anno_path, 'w') as fw: - for key in cat2id.keys(): - fw.write(key + '\n') - logging.info('dumped %d samples to file[%s]' % (samples, roidb_fname)) - - -if __name__ == "__main__": - """ Make sure you have already downloaded original COCO or VOC data, - then you can convert it using this tool. - - Usage: - python generate_data_for_training.py --type=json - --annotation=./annotations/instances_val2017.json - --save-dir=./roidb --samples=100 - """ - args = parse_args() - - # VOC data are organized in xml files - if args.type == 'xml': - dump_voc_as_pickle(args) - # COCO data are organized in json file - elif args.type == 'json': - dump_coco_as_pickle(args) - else: - TypeError('Can\'t deal with {} type. '\ - 'Only xml or json file format supported'.format(args.type)) diff --git a/PaddleCV/PaddleDetection/ppdet/data/tools/x2coco.py b/PaddleCV/PaddleDetection/ppdet/data/tools/x2coco.py deleted file mode 100644 index 0379fab6335cb7886da8fe9f5170717a4453c6d6..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/tools/x2coco.py +++ /dev/null @@ -1,297 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import glob -import json -import os -import os.path as osp -import sys -import shutil - -import numpy as np -import PIL.ImageDraw - - -class MyEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, np.integer): - return int(obj) - elif isinstance(obj, np.floating): - return float(obj) - elif isinstance(obj, np.ndarray): - return obj.tolist() - else: - return super(MyEncoder, self).default(obj) - - -def getbbox(self, points): - polygons = points - mask = self.polygons_to_mask([self.height, self.width], polygons) - return self.mask2box(mask) - - -def images_labelme(data, num): - image = {} - image['height'] = data['imageHeight'] - image['width'] = data['imageWidth'] - image['id'] = num + 1 - image['file_name'] = data['imagePath'].split('/')[-1] - return image - -def images_cityscape(data, num, img_file): - image = {} - image['height'] = data['imgHeight'] - image['width'] = data['imgWidth'] - image['id'] = num + 1 - image['file_name'] = img_file - return image - - -def categories(label, labels_list): - category = {} - category['supercategory'] = 'component' - category['id'] = len(labels_list) + 1 - category['name'] = label - return category - - -def annotations_rectangle(points, label, image_num, object_num, label_to_num): - annotation = {} - seg_points = np.asarray(points).copy() - seg_points[1, :] = np.asarray(points)[2, :] - seg_points[2, :] = np.asarray(points)[1, :] - annotation['segmentation'] = [list(seg_points.flatten())] - annotation['iscrowd'] = 0 - annotation['image_id'] = image_num + 1 - annotation['bbox'] = list( - map(float, [ - points[0][0], points[0][1], points[1][0] - points[0][0], points[1][ - 1] - points[0][1] - ])) - annotation['area'] = annotation['bbox'][2] * annotation['bbox'][3] - annotation['category_id'] = label_to_num[label] - annotation['id'] = object_num + 1 - return annotation - - -def annotations_polygon(height, width, points, label, image_num, object_num, label_to_num): - annotation = {} - annotation['segmentation'] = [list(np.asarray(points).flatten())] - annotation['iscrowd'] = 0 - annotation['image_id'] = image_num + 1 - annotation['bbox'] = list(map(float, get_bbox(height, width, points))) - annotation['area'] = annotation['bbox'][2] * annotation['bbox'][3] - annotation['category_id'] = label_to_num[label] - annotation['id'] = object_num + 1 - return annotation - - -def get_bbox(height, width, points): - polygons = points - mask = np.zeros([height, width], dtype=np.uint8) - mask = PIL.Image.fromarray(mask) - xy = list(map(tuple, polygons)) - PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1) - mask = np.array(mask, dtype=bool) - index = np.argwhere(mask == 1) - rows = index[:, 0] - clos = index[:, 1] - left_top_r = np.min(rows) - left_top_c = np.min(clos) - right_bottom_r = np.max(rows) - right_bottom_c = np.max(clos) - return [ - left_top_c, left_top_r, right_bottom_c - left_top_c, - right_bottom_r - left_top_r - ] - - -def deal_json(ds_type, img_path, json_path): - data_coco = {} - label_to_num = {} - images_list = [] - categories_list = [] - annotations_list = [] - labels_list = [] - image_num = -1 - object_num = -1 - for img_file in os.listdir(img_path): - img_label = img_file.split('.')[0] - if img_file.split('.')[-1] not in ['bmp', 'jpg', 'jpeg', 'png', 'JPEG', 'JPG', 'PNG']: - continue - label_file = osp.join(json_path, img_label + '.json') - print('Generating dataset from:', label_file) - image_num = image_num + 1 - with open(label_file) as f: - data = json.load(f) - if ds_type == 'labelme': - images_list.append(images_labelme(data, image_num)) - elif ds_type == 'cityscape': - images_list.append(images_cityscape(data, image_num, img_file)) - if ds_type == 'labelme': - for shapes in data['shapes']: - object_num = object_num + 1 - label = shapes['label'] - if label not in labels_list: - categories_list.append(categories(label, labels_list)) - labels_list.append(label) - label_to_num[label] = len(labels_list) - points = shapes['points'] - p_type = shapes['shape_type'] - if p_type == 'polygon': - annotations_list.append( - annotations_polygon(data['imageHeight'], data[ - 'imageWidth'], points, label, image_num, object_num, label_to_num)) - - if p_type == 'rectangle': - points.append([points[0][0], points[1][1]]) - points.append([points[1][0], points[0][1]]) - annotations_list.append( - annotations_rectangle(points, label, image_num, object_num, label_to_num)) - elif ds_type == 'cityscape': - for shapes in data['objects']: - object_num = object_num + 1 - label = shapes['label'] - if label not in labels_list: - categories_list.append(categories(label, labels_list)) - labels_list.append(label) - label_to_num[label] = len(labels_list) - points = shapes['polygon'] - annotations_list.append( - annotations_polygon(data['imgHeight'], data[ - 'imgWidth'], points, label, image_num, object_num, label_to_num)) - data_coco['images'] = images_list - data_coco['categories'] = categories_list - data_coco['annotations'] = annotations_list - return data_coco - - -def main(): - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--dataset_type', help='the type of dataset') - parser.add_argument('--json_input_dir', help='input annotated directory') - parser.add_argument('--image_input_dir', help='image directory') - parser.add_argument( - '--output_dir', help='output dataset directory', default='../../../') - parser.add_argument( - '--train_proportion', - help='the proportion of train dataset', - type=float, - default=1.0) - parser.add_argument( - '--val_proportion', - help='the proportion of validation dataset', - type=float, - default=0.0) - parser.add_argument( - '--test_proportion', - help='the proportion of test dataset', - type=float, - default=0.0) - args = parser.parse_args() - try: - assert args.dataset_type in ['labelme', 'cityscape'] - except AssertionError as e: - print('Now only support the cityscape dataset and labelme dataset!!') - os._exit(0) - try: - assert os.path.exists(args.json_input_dir) - except AssertionError as e: - print('The json folder does not exist!') - os._exit(0) - try: - assert os.path.exists(args.image_input_dir) - except AssertionError as e: - print('The image folder does not exist!') - os._exit(0) - try: - assert args.train_proportion + args.val_proportion + args.test_proportion == 1.0 - except AssertionError as e: - print( - 'The sum of pqoportion of training, validation and test datase must be 1!' - ) - os._exit(0) - - # Allocate the dataset. - total_num = len(glob.glob(osp.join(args.json_input_dir, '*.json'))) - if args.train_proportion != 0: - train_num = int(total_num * args.train_proportion) - os.makedirs(args.output_dir + '/train') - else: - train_num = 0 - if args.val_proportion == 0.0: - val_num = 0 - test_num = total_num - train_num - if args.test_proportion != 0.0: - os.makedirs(args.output_dir + '/test') - else: - val_num = int(total_num * args.val_proportion) - test_num = total_num - train_num - val_num - os.makedirs(args.output_dir + '/val') - if args.test_proportion != 0.0: - os.makedirs(args.output_dir + '/test') - count = 1 - for img_name in os.listdir(args.image_input_dir): - if count <= train_num: - shutil.copyfile( - osp.join(args.image_input_dir, img_name), - osp.join(args.output_dir + '/train/', img_name)) - else: - if count <= train_num + val_num: - shutil.copyfile( - osp.join(args.image_input_dir, img_name), - osp.join(args.output_dir + '/val/', img_name)) - else: - shutil.copyfile( - osp.join(args.image_input_dir, img_name), - osp.join(args.output_dir + '/test/', img_name)) - count = count + 1 - - # Deal with the json files. - if not os.path.exists(args.output_dir + '/annotations'): - os.makedirs(args.output_dir + '/annotations') - if args.train_proportion != 0: - train_data_coco = deal_json(args.dataset_type, - args.output_dir + '/train', - args.json_input_dir) - train_json_path = osp.join(args.output_dir + '/annotations', - 'instance_train.json') - json.dump( - train_data_coco, - open(train_json_path, 'w'), - indent=4, - cls=MyEncoder) - if args.val_proportion != 0: - val_data_coco = deal_json(args.dataset_type, - args.output_dir + '/val', - args.json_input_dir) - val_json_path = osp.join(args.output_dir + '/annotations', - 'instance_val.json') - json.dump( - val_data_coco, open(val_json_path, 'w'), indent=4, cls=MyEncoder) - if args.test_proportion != 0: - test_data_coco = deal_json(args.dataset_type, - args.output_dir + '/test', - args.json_input_dir) - test_json_path = osp.join(args.output_dir + '/annotations', - 'instance_test.json') - json.dump( - test_data_coco, open(test_json_path, 'w'), indent=4, cls=MyEncoder) - -if __name__ == '__main__': - main() diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/__init__.py b/PaddleCV/PaddleDetection/ppdet/data/transform/__init__.py deleted file mode 100644 index f4d15e9c3354c6c44912c456eb766eae4d7d32c5..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/transform/__init__.py +++ /dev/null @@ -1,143 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import print_function - -import copy -import logging -import traceback - -from .transformer import MappedDataset, BatchedDataset -from .post_map import build_post_map -from .parallel_map import ParallelMappedDataset -from .operators import BaseOperator, registered_ops - -__all__ = ['build_mapper', 'map', 'batch', 'batch_map'] - -logger = logging.getLogger(__name__) - - -def build_mapper(ops, context=None): - """ - Build a mapper for operators in 'ops' - - Args: - ops (list of operator.BaseOperator or list of op dict): - configs for oprators, eg: - [{'name': 'DecodeImage', 'params': {'to_rgb': True}}, {xxx}] - context (dict): a context object for mapper - - Returns: - a mapper function which accept one argument 'sample' and - return the processed result - """ - new_ops = [] - for _dict in ops: - new_dict = {} - for i, j in _dict.items(): - new_dict[i.lower()] = j - new_ops.append(new_dict) - ops = new_ops - op_funcs = [] - op_repr = [] - for op in ops: - if type(op) is dict and 'op' in op: - op_func = getattr(BaseOperator, op['op']) - params = copy.deepcopy(op) - del params['op'] - o = op_func(**params) - elif not isinstance(op, BaseOperator): - op_func = getattr(BaseOperator, op['name']) - params = {} if 'params' not in op else op['params'] - o = op_func(**params) - else: - assert isinstance(op, BaseOperator), \ - "invalid operator when build ops" - o = op - op_funcs.append(o) - op_repr.append('{{{}}}'.format(str(o))) - op_repr = '[{}]'.format(','.join(op_repr)) - - def _mapper(sample): - ctx = {} if context is None else copy.deepcopy(context) - for f in op_funcs: - try: - out = f(sample, ctx) - sample = out - except Exception as e: - stack_info = traceback.format_exc() - logger.warn("fail to map op [{}] with error: {} and stack:\n{}". - format(f, e, str(stack_info))) - raise e - - return out - - _mapper.ops = op_repr - return _mapper - - -def map(ds, mapper, worker_args=None): - """ - Apply 'mapper' to 'ds' - - Args: - ds (instance of Dataset): dataset to be mapped - mapper (function): action to be executed for every data sample - worker_args (dict): configs for concurrent mapper - Returns: - a mapped dataset - """ - - if worker_args is not None: - return ParallelMappedDataset(ds, mapper, worker_args) - else: - return MappedDataset(ds, mapper) - - -def batch(ds, batchsize, drop_last=False, drop_empty=True): - """ - Batch data samples to batches - Args: - batchsize (int): number of samples for a batch - drop_last (bool): drop last few samples if not enough for a batch - - Returns: - a batched dataset - """ - - return BatchedDataset( - ds, batchsize, drop_last=drop_last, drop_empty=drop_empty) - - -def batch_map(ds, config): - """ - Post process the batches. - - Args: - ds (instance of Dataset): dataset to be mapped - mapper (function): action to be executed for every batch - Returns: - a batched dataset which is processed - """ - - mapper = build_post_map(**config) - return MappedDataset(ds, mapper) - - -for nm in registered_ops: - op = getattr(BaseOperator, nm) - locals()[nm] = op - -__all__ += registered_ops diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/arrange_sample.py b/PaddleCV/PaddleDetection/ppdet/data/transform/arrange_sample.py deleted file mode 100644 index bebce691d36ddb12141dd7bfdf81030ff8ed2d1f..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/transform/arrange_sample.py +++ /dev/null @@ -1,384 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# function: -# operators to process sample, -# eg: decode/resize/crop image - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import logging -import numpy as np -from .operators import BaseOperator, register_op - -logger = logging.getLogger(__name__) - - -@register_op -class ArrangeRCNN(BaseOperator): - """ - Transform dict to tuple format needed for training. - - Args: - is_mask (bool): whether to use include mask data - """ - - def __init__(self, is_mask=False): - super(ArrangeRCNN, self).__init__() - self.is_mask = is_mask - assert isinstance(self.is_mask, bool), "wrong type for is_mask" - - def __call__(self, sample, context=None): - """ - Args: - sample: a dict which contains image - info and annotation info. - context: a dict which contains additional info. - Returns: - sample: a tuple containing following items - (image, im_info, im_id, gt_bbox, gt_class, is_crowd, gt_masks) - """ - im = sample['image'] - gt_bbox = sample['gt_bbox'] - gt_class = sample['gt_class'] - keys = list(sample.keys()) - if 'is_crowd' in keys: - is_crowd = sample['is_crowd'] - else: - raise KeyError("The dataset doesn't have 'is_crowd' key.") - if 'im_info' in keys: - im_info = sample['im_info'] - else: - raise KeyError("The dataset doesn't have 'im_info' key.") - im_id = sample['im_id'] - - outs = (im, im_info, im_id, gt_bbox, gt_class, is_crowd) - gt_masks = [] - if self.is_mask and len(sample['gt_poly']) != 0 \ - and 'is_crowd' in keys: - valid = True - segms = sample['gt_poly'] - assert len(segms) == is_crowd.shape[0] - for i in range(len(sample['gt_poly'])): - segm, iscrowd = segms[i], is_crowd[i] - gt_segm = [] - if iscrowd: - gt_segm.append([[0, 0]]) - else: - for poly in segm: - if len(poly) == 0: - valid = False - break - gt_segm.append(np.array(poly).reshape(-1, 2)) - if (not valid) or len(gt_segm) == 0: - break - gt_masks.append(gt_segm) - outs = outs + (gt_masks, ) - return outs - - -@register_op -class ArrangeEvalRCNN(BaseOperator): - """ - Transform dict to the tuple format needed for evaluation. - """ - - def __init__(self): - super(ArrangeEvalRCNN, self).__init__() - - def __call__(self, sample, context=None): - """ - Args: - sample: a dict which contains image - info and annotation info. - context: a dict which contains additional info. - Returns: - sample: a tuple containing the following items: - (image, im_info, im_id, im_shape, gt_bbox, - gt_class, difficult) - """ - ims = [] - keys = sorted(list(sample.keys())) - for k in keys: - if 'image' in k: - ims.append(sample[k]) - if 'im_info' in keys: - im_info = sample['im_info'] - else: - raise KeyError("The dataset doesn't have 'im_info' key.") - im_id = sample['im_id'] - h = sample['h'] - w = sample['w'] - # For rcnn models in eval and infer stage, original image size - # is needed to clip the bounding boxes. And box clip op in - # bbox prediction needs im_info as input in format of [N, 3], - # so im_shape is appended by 1 to match dimension. - im_shape = np.array((h, w, 1), dtype=np.float32) - gt_bbox = sample['gt_bbox'] - gt_class = sample['gt_class'] - difficult = sample['difficult'] - remain_list = [im_info, im_id, im_shape, gt_bbox, gt_class, difficult] - ims.extend(remain_list) - outs = tuple(ims) - return outs - - -@register_op -class ArrangeTestRCNN(BaseOperator): - """ - Transform dict to the tuple format needed for training. - """ - - def __init__(self): - super(ArrangeTestRCNN, self).__init__() - - def __call__(self, sample, context=None): - """ - Args: - sample: a dict which contains image - info and annotation info. - context: a dict which contains additional info. - Returns: - sample: a tuple containing the following items: - (image, im_info, im_id, im_shape) - """ - ims = [] - keys = sorted(list(sample.keys())) - for k in keys: - if 'image' in k: - ims.append(sample[k]) - if 'im_info' in keys: - im_info = sample['im_info'] - else: - raise KeyError("The dataset doesn't have 'im_info' key.") - im_id = sample['im_id'] - h = sample['h'] - w = sample['w'] - # For rcnn models in eval and infer stage, original image size - # is needed to clip the bounding boxes. And box clip op in - # bbox prediction needs im_info as input in format of [N, 3], - # so im_shape is appended by 1 to match dimension. - im_shape = np.array((h, w, 1), dtype=np.float32) - remain_list = [im_info, im_id, im_shape] - ims.extend(remain_list) - outs = tuple(ims) - return outs - - -@register_op -class ArrangeSSD(BaseOperator): - """ - Transform dict to tuple format needed for training. - """ - - def __init__(self): - super(ArrangeSSD, self).__init__() - - def __call__(self, sample, context=None): - """ - Args: - sample: a dict which contains image - info and annotation info. - context: a dict which contains additional info. - Returns: - sample: a tuple containing the following items: - (image, gt_bbox, gt_class, difficult) - """ - im = sample['image'] - gt_bbox = sample['gt_bbox'] - gt_class = sample['gt_class'] - outs = (im, gt_bbox, gt_class) - return outs - - -@register_op -class ArrangeEvalSSD(BaseOperator): - """ - Transform dict to tuple format needed for training. - """ - - def __init__(self, fields): - super(ArrangeEvalSSD, self).__init__() - self.fields = fields - - def __call__(self, sample, context=None): - """ - Args: - sample: a dict which contains image - info and annotation info. - context: a dict which contains additional info. - Returns: - sample: a tuple containing the following items: (image) - """ - outs = [] - if len(sample['gt_bbox']) != len(sample['gt_class']): - raise ValueError("gt num mismatch: bbox and class.") - for field in self.fields: - if field == 'im_shape': - h = sample['h'] - w = sample['w'] - im_shape = np.array((h, w)) - outs.append(im_shape) - elif field == 'is_difficult': - outs.append(sample['difficult']) - elif field == 'gt_box': - outs.append(sample['gt_bbox']) - elif field == 'gt_label': - outs.append(sample['gt_class']) - else: - outs.append(sample[field]) - - outs = tuple(outs) - - return outs - - -@register_op -class ArrangeTestSSD(BaseOperator): - """ - Transform dict to tuple format needed for training. - - Args: - is_mask (bool): whether to use include mask data - """ - - def __init__(self): - super(ArrangeTestSSD, self).__init__() - - def __call__(self, sample, context=None): - """ - Args: - sample: a dict which contains image - info and annotation info. - context: a dict which contains additional info. - Returns: - sample: a tuple containing the following items: (image) - """ - im = sample['image'] - im_id = sample['im_id'] - h = sample['h'] - w = sample['w'] - im_shape = np.array((h, w)) - outs = (im, im_id, im_shape) - return outs - - -@register_op -class ArrangeYOLO(BaseOperator): - """ - Transform dict to the tuple format needed for training. - """ - - def __init__(self): - super(ArrangeYOLO, self).__init__() - - def __call__(self, sample, context=None): - """ - Args: - sample: a dict which contains image - info and annotation info. - context: a dict which contains additional info. - Returns: - sample: a tuple containing the following items: - (image, gt_bbox, gt_class, gt_score, - is_crowd, im_info, gt_masks) - """ - im = sample['image'] - if len(sample['gt_bbox']) != len(sample['gt_class']): - raise ValueError("gt num mismatch: bbox and class.") - if len(sample['gt_bbox']) != len(sample['gt_score']): - raise ValueError("gt num mismatch: bbox and score.") - gt_bbox = np.zeros((50, 4), dtype=im.dtype) - gt_class = np.zeros((50, ), dtype=np.int32) - gt_score = np.zeros((50, ), dtype=im.dtype) - gt_num = min(50, len(sample['gt_bbox'])) - if gt_num > 0: - gt_bbox[:gt_num, :] = sample['gt_bbox'][:gt_num, :] - gt_class[:gt_num] = sample['gt_class'][:gt_num, 0] - gt_score[:gt_num] = sample['gt_score'][:gt_num, 0] - # parse [x1, y1, x2, y2] to [x, y, w, h] - gt_bbox[:, 2:4] = gt_bbox[:, 2:4] - gt_bbox[:, :2] - gt_bbox[:, :2] = gt_bbox[:, :2] + gt_bbox[:, 2:4] / 2. - outs = (im, gt_bbox, gt_class, gt_score) - return outs - - -@register_op -class ArrangeEvalYOLO(BaseOperator): - """ - Transform dict to the tuple format needed for evaluation. - """ - - def __init__(self): - super(ArrangeEvalYOLO, self).__init__() - - def __call__(self, sample, context=None): - """ - Args: - sample: a dict which contains image - info and annotation info. - context: a dict which contains additional info. - Returns: - sample: a tuple containing the following items: - (image, im_shape, im_id, gt_bbox, gt_class, - difficult) - """ - im = sample['image'] - if len(sample['gt_bbox']) != len(sample['gt_class']): - raise ValueError("gt num mismatch: bbox and class.") - im_id = sample['im_id'] - h = sample['h'] - w = sample['w'] - im_shape = np.array((h, w)) - gt_bbox = np.zeros((50, 4), dtype=im.dtype) - gt_class = np.zeros((50, ), dtype=np.int32) - difficult = np.zeros((50, ), dtype=np.int32) - gt_num = min(50, len(sample['gt_bbox'])) - if gt_num > 0: - gt_bbox[:gt_num, :] = sample['gt_bbox'][:gt_num, :] - gt_class[:gt_num] = sample['gt_class'][:gt_num, 0] - difficult[:gt_num] = sample['difficult'][:gt_num, 0] - outs = (im, im_shape, im_id, gt_bbox, gt_class, difficult) - return outs - - -@register_op -class ArrangeTestYOLO(BaseOperator): - """ - Transform dict to the tuple format needed for inference. - """ - - def __init__(self): - super(ArrangeTestYOLO, self).__init__() - - def __call__(self, sample, context=None): - """ - Args: - sample: a dict which contains image - info and annotation info. - context: a dict which contains additional info. - Returns: - sample: a tuple containing the following items: - (image, gt_bbox, gt_class, gt_score, is_crowd, - im_info, gt_masks) - """ - im = sample['image'] - im_id = sample['im_id'] - h = sample['h'] - w = sample['w'] - im_shape = np.array((h, w)) - outs = (im, im_shape, im_id) - return outs diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/op_helper.py b/PaddleCV/PaddleDetection/ppdet/data/transform/op_helper.py deleted file mode 100644 index 838714f4dda2b664ae4d2b1f3ee343e5b6e50360..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/transform/op_helper.py +++ /dev/null @@ -1,389 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# this file contains helper methods for BBOX processing - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import random -import math -import cv2 - - -def meet_emit_constraint(src_bbox, sample_bbox): - center_x = (src_bbox[2] + src_bbox[0]) / 2 - center_y = (src_bbox[3] + src_bbox[1]) / 2 - if center_x >= sample_bbox[0] and \ - center_x <= sample_bbox[2] and \ - center_y >= sample_bbox[1] and \ - center_y <= sample_bbox[3]: - return True - return False - - -def clip_bbox(src_bbox): - src_bbox[0] = max(min(src_bbox[0], 1.0), 0.0) - src_bbox[1] = max(min(src_bbox[1], 1.0), 0.0) - src_bbox[2] = max(min(src_bbox[2], 1.0), 0.0) - src_bbox[3] = max(min(src_bbox[3], 1.0), 0.0) - return src_bbox - - -def bbox_area(src_bbox): - if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]: - return 0. - else: - width = src_bbox[2] - src_bbox[0] - height = src_bbox[3] - src_bbox[1] - return width * height - - -def is_overlap(object_bbox, sample_bbox): - if object_bbox[0] >= sample_bbox[2] or \ - object_bbox[2] <= sample_bbox[0] or \ - object_bbox[1] >= sample_bbox[3] or \ - object_bbox[3] <= sample_bbox[1]: - return False - else: - return True - - -def filter_and_process(sample_bbox, bboxes, labels, scores=None): - new_bboxes = [] - new_labels = [] - new_scores = [] - for i in range(len(bboxes)): - new_bbox = [0, 0, 0, 0] - obj_bbox = [bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]] - if not meet_emit_constraint(obj_bbox, sample_bbox): - continue - if not is_overlap(obj_bbox, sample_bbox): - continue - sample_width = sample_bbox[2] - sample_bbox[0] - sample_height = sample_bbox[3] - sample_bbox[1] - new_bbox[0] = (obj_bbox[0] - sample_bbox[0]) / sample_width - new_bbox[1] = (obj_bbox[1] - sample_bbox[1]) / sample_height - new_bbox[2] = (obj_bbox[2] - sample_bbox[0]) / sample_width - new_bbox[3] = (obj_bbox[3] - sample_bbox[1]) / sample_height - new_bbox = clip_bbox(new_bbox) - if bbox_area(new_bbox) > 0: - new_bboxes.append(new_bbox) - new_labels.append([labels[i][0]]) - if scores is not None: - new_scores.append([scores[i][0]]) - bboxes = np.array(new_bboxes) - labels = np.array(new_labels) - scores = np.array(new_scores) - return bboxes, labels, scores - - -def bbox_area_sampling(bboxes, labels, scores, target_size, min_size): - new_bboxes = [] - new_labels = [] - new_scores = [] - for i, bbox in enumerate(bboxes): - w = float((bbox[2] - bbox[0]) * target_size) - h = float((bbox[3] - bbox[1]) * target_size) - if w * h < float(min_size * min_size): - continue - else: - new_bboxes.append(bbox) - new_labels.append(labels[i]) - if scores is not None and scores.size != 0: - new_scores.append(scores[i]) - bboxes = np.array(new_bboxes) - labels = np.array(new_labels) - scores = np.array(new_scores) - return bboxes, labels, scores - - -def generate_sample_bbox(sampler): - scale = np.random.uniform(sampler[2], sampler[3]) - aspect_ratio = np.random.uniform(sampler[4], sampler[5]) - aspect_ratio = max(aspect_ratio, (scale**2.0)) - aspect_ratio = min(aspect_ratio, 1 / (scale**2.0)) - bbox_width = scale * (aspect_ratio**0.5) - bbox_height = scale / (aspect_ratio**0.5) - xmin_bound = 1 - bbox_width - ymin_bound = 1 - bbox_height - xmin = np.random.uniform(0, xmin_bound) - ymin = np.random.uniform(0, ymin_bound) - xmax = xmin + bbox_width - ymax = ymin + bbox_height - sampled_bbox = [xmin, ymin, xmax, ymax] - return sampled_bbox - - -def generate_sample_bbox_square(sampler, image_width, image_height): - scale = np.random.uniform(sampler[2], sampler[3]) - aspect_ratio = np.random.uniform(sampler[4], sampler[5]) - aspect_ratio = max(aspect_ratio, (scale**2.0)) - aspect_ratio = min(aspect_ratio, 1 / (scale**2.0)) - bbox_width = scale * (aspect_ratio**0.5) - bbox_height = scale / (aspect_ratio**0.5) - if image_height < image_width: - bbox_width = bbox_height * image_height / image_width - else: - bbox_height = bbox_width * image_width / image_height - xmin_bound = 1 - bbox_width - ymin_bound = 1 - bbox_height - xmin = np.random.uniform(0, xmin_bound) - ymin = np.random.uniform(0, ymin_bound) - xmax = xmin + bbox_width - ymax = ymin + bbox_height - sampled_bbox = [xmin, ymin, xmax, ymax] - return sampled_bbox - - -def data_anchor_sampling(bbox_labels, image_width, image_height, scale_array, - resize_width): - num_gt = len(bbox_labels) - # np.random.randint range: [low, high) - rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0 - - if num_gt != 0: - norm_xmin = bbox_labels[rand_idx][0] - norm_ymin = bbox_labels[rand_idx][1] - norm_xmax = bbox_labels[rand_idx][2] - norm_ymax = bbox_labels[rand_idx][3] - - xmin = norm_xmin * image_width - ymin = norm_ymin * image_height - wid = image_width * (norm_xmax - norm_xmin) - hei = image_height * (norm_ymax - norm_ymin) - range_size = 0 - - area = wid * hei - for scale_ind in range(0, len(scale_array) - 1): - if area > scale_array[scale_ind] ** 2 and area < \ - scale_array[scale_ind + 1] ** 2: - range_size = scale_ind + 1 - break - - if area > scale_array[len(scale_array) - 2]**2: - range_size = len(scale_array) - 2 - - scale_choose = 0.0 - if range_size == 0: - rand_idx_size = 0 - else: - # np.random.randint range: [low, high) - rng_rand_size = np.random.randint(0, range_size + 1) - rand_idx_size = rng_rand_size % (range_size + 1) - - if rand_idx_size == range_size: - min_resize_val = scale_array[rand_idx_size] / 2.0 - max_resize_val = min(2.0 * scale_array[rand_idx_size], - 2 * math.sqrt(wid * hei)) - scale_choose = random.uniform(min_resize_val, max_resize_val) - else: - min_resize_val = scale_array[rand_idx_size] / 2.0 - max_resize_val = 2.0 * scale_array[rand_idx_size] - scale_choose = random.uniform(min_resize_val, max_resize_val) - - sample_bbox_size = wid * resize_width / scale_choose - - w_off_orig = 0.0 - h_off_orig = 0.0 - if sample_bbox_size < max(image_height, image_width): - if wid <= sample_bbox_size: - w_off_orig = np.random.uniform(xmin + wid - sample_bbox_size, - xmin) - else: - w_off_orig = np.random.uniform(xmin, - xmin + wid - sample_bbox_size) - - if hei <= sample_bbox_size: - h_off_orig = np.random.uniform(ymin + hei - sample_bbox_size, - ymin) - else: - h_off_orig = np.random.uniform(ymin, - ymin + hei - sample_bbox_size) - - else: - w_off_orig = np.random.uniform(image_width - sample_bbox_size, 0.0) - h_off_orig = np.random.uniform(image_height - sample_bbox_size, 0.0) - - w_off_orig = math.floor(w_off_orig) - h_off_orig = math.floor(h_off_orig) - - # Figure out top left coordinates. - w_off = float(w_off_orig / image_width) - h_off = float(h_off_orig / image_height) - - sampled_bbox = [ - w_off, h_off, w_off + float(sample_bbox_size / image_width), - h_off + float(sample_bbox_size / image_height) - ] - return sampled_bbox - else: - return 0 - - -def jaccard_overlap(sample_bbox, object_bbox): - if sample_bbox[0] >= object_bbox[2] or \ - sample_bbox[2] <= object_bbox[0] or \ - sample_bbox[1] >= object_bbox[3] or \ - sample_bbox[3] <= object_bbox[1]: - return 0 - intersect_xmin = max(sample_bbox[0], object_bbox[0]) - intersect_ymin = max(sample_bbox[1], object_bbox[1]) - intersect_xmax = min(sample_bbox[2], object_bbox[2]) - intersect_ymax = min(sample_bbox[3], object_bbox[3]) - intersect_size = (intersect_xmax - intersect_xmin) * ( - intersect_ymax - intersect_ymin) - sample_bbox_size = bbox_area(sample_bbox) - object_bbox_size = bbox_area(object_bbox) - overlap = intersect_size / ( - sample_bbox_size + object_bbox_size - intersect_size) - return overlap - - -def intersect_bbox(bbox1, bbox2): - if bbox2[0] > bbox1[2] or bbox2[2] < bbox1[0] or \ - bbox2[1] > bbox1[3] or bbox2[3] < bbox1[1]: - intersection_box = [0.0, 0.0, 0.0, 0.0] - else: - intersection_box = [ - max(bbox1[0], bbox2[0]), max(bbox1[1], bbox2[1]), - min(bbox1[2], bbox2[2]), min(bbox1[3], bbox2[3]) - ] - return intersection_box - - -def bbox_coverage(bbox1, bbox2): - inter_box = intersect_bbox(bbox1, bbox2) - intersect_size = bbox_area(inter_box) - - if intersect_size > 0: - bbox1_size = bbox_area(bbox1) - return intersect_size / bbox1_size - else: - return 0. - - -def satisfy_sample_constraint(sampler, - sample_bbox, - gt_bboxes, - satisfy_all=False): - if sampler[6] == 0 and sampler[7] == 0: - return True - satisfied = [] - for i in range(len(gt_bboxes)): - object_bbox = [ - gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3] - ] - overlap = jaccard_overlap(sample_bbox, object_bbox) - if sampler[6] != 0 and \ - overlap < sampler[6]: - satisfied.append(False) - continue - if sampler[7] != 0 and \ - overlap > sampler[7]: - satisfied.append(False) - continue - satisfied.append(True) - if not satisfy_all: - return True - - if satisfy_all: - return np.all(satisfied) - else: - return False - - -def satisfy_sample_constraint_coverage(sampler, sample_bbox, gt_bboxes): - if sampler[6] == 0 and sampler[7] == 0: - has_jaccard_overlap = False - else: - has_jaccard_overlap = True - if sampler[8] == 0 and sampler[9] == 0: - has_object_coverage = False - else: - has_object_coverage = True - - if not has_jaccard_overlap and not has_object_coverage: - return True - found = False - for i in range(len(gt_bboxes)): - object_bbox = [ - gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3] - ] - if has_jaccard_overlap: - overlap = jaccard_overlap(sample_bbox, object_bbox) - if sampler[6] != 0 and \ - overlap < sampler[6]: - continue - if sampler[7] != 0 and \ - overlap > sampler[7]: - continue - found = True - if has_object_coverage: - object_coverage = bbox_coverage(object_bbox, sample_bbox) - if sampler[8] != 0 and \ - object_coverage < sampler[8]: - continue - if sampler[9] != 0 and \ - object_coverage > sampler[9]: - continue - found = True - if found: - return True - return found - - -def crop_image_sampling(img, sample_bbox, image_width, image_height, - target_size): - # no clipping here - xmin = int(sample_bbox[0] * image_width) - xmax = int(sample_bbox[2] * image_width) - ymin = int(sample_bbox[1] * image_height) - ymax = int(sample_bbox[3] * image_height) - - w_off = xmin - h_off = ymin - width = xmax - xmin - height = ymax - ymin - cross_xmin = max(0.0, float(w_off)) - cross_ymin = max(0.0, float(h_off)) - cross_xmax = min(float(w_off + width - 1.0), float(image_width)) - cross_ymax = min(float(h_off + height - 1.0), float(image_height)) - cross_width = cross_xmax - cross_xmin - cross_height = cross_ymax - cross_ymin - - roi_xmin = 0 if w_off >= 0 else abs(w_off) - roi_ymin = 0 if h_off >= 0 else abs(h_off) - roi_width = cross_width - roi_height = cross_height - - roi_y1 = int(roi_ymin) - roi_y2 = int(roi_ymin + roi_height) - roi_x1 = int(roi_xmin) - roi_x2 = int(roi_xmin + roi_width) - - cross_y1 = int(cross_ymin) - cross_y2 = int(cross_ymin + cross_height) - cross_x1 = int(cross_xmin) - cross_x2 = int(cross_xmin + cross_width) - - sample_img = np.zeros((height, width, 3)) - sample_img[roi_y1: roi_y2, roi_x1: roi_x2] = \ - img[cross_y1: cross_y2, cross_x1: cross_x2] - - sample_img = cv2.resize( - sample_img, (target_size, target_size), interpolation=cv2.INTER_AREA) - - return sample_img diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/operators.py b/PaddleCV/PaddleDetection/ppdet/data/transform/operators.py deleted file mode 100644 index b09998120ffc1e76e42299489f16bacc6424454c..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/transform/operators.py +++ /dev/null @@ -1,991 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# function: -# operators to process sample, -# eg: decode/resize/crop image - -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import uuid -import logging -import random -import math -import numpy as np -import cv2 -from PIL import Image, ImageEnhance - -from ppdet.core.workspace import serializable - -from .op_helper import (satisfy_sample_constraint, filter_and_process, - generate_sample_bbox, clip_bbox, data_anchor_sampling, - satisfy_sample_constraint_coverage, crop_image_sampling, - generate_sample_bbox_square, bbox_area_sampling) - -logger = logging.getLogger(__name__) - -registered_ops = [] - - -def register_op(cls): - registered_ops.append(cls.__name__) - if not hasattr(BaseOperator, cls.__name__): - setattr(BaseOperator, cls.__name__, cls) - else: - raise KeyError("The {} class has been registered.".format(cls.__name__)) - return serializable(cls) - - -class BboxError(ValueError): - pass - - -class ImageError(ValueError): - pass - - -class BaseOperator(object): - def __init__(self, name=None): - if name is None: - name = self.__class__.__name__ - self._id = name + '_' + str(uuid.uuid4())[-6:] - - def __call__(self, sample, context=None): - """ Process a sample. - Args: - sample (dict): a dict of sample, eg: {'image':xx, 'label': xxx} - context (dict): info about this sample processing - Returns: - result (dict): a processed sample - """ - return sample - - def __str__(self): - return str(self._id) - - -@register_op -class DecodeImage(BaseOperator): - def __init__(self, to_rgb=True, with_mixup=False): - """ Transform the image data to numpy format. - - Args: - to_rgb (bool): whether to convert BGR to RGB - with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score - """ - - super(DecodeImage, self).__init__() - self.to_rgb = to_rgb - self.with_mixup = with_mixup - if not isinstance(self.to_rgb, bool): - raise TypeError("{}: input type is invalid.".format(self)) - if not isinstance(self.with_mixup, bool): - raise TypeError("{}: input type is invalid.".format(self)) - - def __call__(self, sample, context=None): - """ load image if 'im_file' field is not empty but 'image' is""" - if 'image' not in sample: - with open(sample['im_file'], 'rb') as f: - sample['image'] = f.read() - - im = sample['image'] - data = np.frombuffer(im, dtype='uint8') - im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode - if self.to_rgb: - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - sample['image'] = im - - if 'h' not in sample: - sample['h'] = im.shape[0] - if 'w' not in sample: - sample['w'] = im.shape[1] - # make default im_info with [h, w, 1] - sample['im_info'] = np.array( - [im.shape[0], im.shape[1], 1.], dtype=np.float32) - # decode mixup image - if self.with_mixup and 'mixup' in sample: - self.__call__(sample['mixup'], context) - return sample - - -@register_op -class MultiscaleTestResize(BaseOperator): - def __init__(self, - origin_target_size=800, - origin_max_size=1333, - target_size=[], - max_size=2000, - interp=cv2.INTER_LINEAR, - use_flip=True): - """ - Rescale image to the each size in target size, and capped at max_size. - - Args: - origin_target_size(int): original target size of image's short side. - origin_max_size(int): original max size of image. - target_size (list): A list of target sizes of image's short side. - max_size (int): the max size of image. - interp (int): the interpolation method. - use_flip (bool): whether use flip augmentation. - """ - super(MultiscaleTestResize, self).__init__() - self.origin_target_size = int(origin_target_size) - self.origin_max_size = int(origin_max_size) - self.max_size = int(max_size) - self.interp = int(interp) - self.use_flip = use_flip - - if not isinstance(target_size, list): - raise TypeError( - "Type of target_size is invalid. Must be List, now is {}". - format(type(target_size))) - self.target_size = target_size - if not (isinstance(self.origin_target_size, int) and isinstance( - self.origin_max_size, int) and isinstance(self.max_size, int) - and isinstance(self.interp, int)): - raise TypeError("{}: input type is invalid.".format(self)) - - def __call__(self, sample, context=None): - """ Resize the image numpy for multi-scale test. - """ - origin_ims = {} - im = sample['image'] - if not isinstance(im, np.ndarray): - raise TypeError("{}: image type is not numpy.".format(self)) - if len(im.shape) != 3: - raise ImageError('{}: image is not 3-dimensional.'.format(self)) - im_shape = im.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - if float(im_size_min) == 0: - raise ZeroDivisionError('{}: min size of image is 0'.format(self)) - base_name_list = ['image'] - origin_ims['image'] = im - if self.use_flip: - sample['flip_image'] = im[:, ::-1, :] - base_name_list.append('flip_image') - origin_ims['flip_image'] = sample['flip_image'] - im_info = [] - for base_name in base_name_list: - im_scale = float(self.origin_target_size) / float(im_size_min) - # Prevent the biggest axis from being more than max_size - if np.round(im_scale * im_size_max) > self.origin_max_size: - im_scale = float(self.origin_max_size) / float(im_size_max) - im_scale_x = im_scale - im_scale_y = im_scale - - resize_w = np.round(im_scale_x * float(im_shape[1])) - resize_h = np.round(im_scale_y * float(im_shape[0])) - im_resize = cv2.resize( - origin_ims[base_name], - None, - None, - fx=im_scale_x, - fy=im_scale_y, - interpolation=self.interp) - im_info.extend([resize_h, resize_w, im_scale]) - sample[base_name] = im_resize - for i, size in enumerate(self.target_size): - im_scale = float(size) / float(im_size_min) - if np.round(im_scale * im_size_max) > self.max_size: - im_scale = float(self.max_size) / float(im_size_max) - im_scale_x = im_scale - im_scale_y = im_scale - resize_w = np.round(im_scale_x * float(im_shape[1])) - resize_h = np.round(im_scale_y * float(im_shape[0])) - im_resize = cv2.resize( - origin_ims[base_name], - None, - None, - fx=im_scale_x, - fy=im_scale_y, - interpolation=self.interp) - im_info.extend([resize_h, resize_w, im_scale]) - name = base_name + '_scale_' + str(i) - sample[name] = im_resize - sample['im_info'] = np.array(im_info, dtype=np.float32) - return sample - - -@register_op -class ResizeImage(BaseOperator): - def __init__(self, - target_size=0, - max_size=0, - interp=cv2.INTER_LINEAR, - use_cv2=True): - """ - Rescale image to the specified target size, and capped at max_size - if max_size != 0. - If target_size is list, selected a scale randomly as the specified - target size. - - Args: - target_size (int|list): the target size of image's short side, - multi-scale training is adopted when type is list. - max_size (int): the max size of image - interp (int): the interpolation method - use_cv2 (bool): use the cv2 interpolation method or use PIL - interpolation method - """ - super(ResizeImage, self).__init__() - self.max_size = int(max_size) - self.interp = int(interp) - self.use_cv2 = use_cv2 - if not (isinstance(target_size, int) or isinstance(target_size, list)): - raise TypeError( - "Type of target_size is invalid. Must be Integer or List, now is {}". - format(type(target_size))) - self.target_size = target_size - if not (isinstance(self.max_size, int) and isinstance(self.interp, - int)): - raise TypeError("{}: input type is invalid.".format(self)) - - def __call__(self, sample, context=None): - """ Resize the image numpy. - """ - im = sample['image'] - if not isinstance(im, np.ndarray): - raise TypeError("{}: image type is not numpy.".format(self)) - if len(im.shape) != 3: - raise ImageError('{}: image is not 3-dimensional.'.format(self)) - im_shape = im.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - if isinstance(self.target_size, list): - # Case for multi-scale training - selected_size = random.choice(self.target_size) - else: - selected_size = self.target_size - if float(im_size_min) == 0: - raise ZeroDivisionError('{}: min size of image is 0'.format(self)) - if self.max_size != 0: - im_scale = float(selected_size) / float(im_size_min) - # Prevent the biggest axis from being more than max_size - if np.round(im_scale * im_size_max) > self.max_size: - im_scale = float(self.max_size) / float(im_size_max) - im_scale_x = im_scale - im_scale_y = im_scale - - resize_w = np.round(im_scale_x * float(im_shape[1])) - resize_h = np.round(im_scale_y * float(im_shape[0])) - im_info = [resize_h, resize_w, im_scale] - if 'im_info' in sample and sample['im_info'][2] != 1.: - sample['im_info'] = np.append( - list(sample['im_info']), im_info).astype(np.float32) - else: - sample['im_info'] = np.array(im_info).astype(np.float32) - else: - im_scale_x = float(selected_size) / float(im_shape[1]) - im_scale_y = float(selected_size) / float(im_shape[0]) - - resize_w = selected_size - resize_h = selected_size - - if self.use_cv2: - im = cv2.resize( - im, - None, - None, - fx=im_scale_x, - fy=im_scale_y, - interpolation=self.interp) - else: - im = Image.fromarray(im) - im = im.resize((resize_w, resize_h), self.interp) - im = np.array(im) - - sample['image'] = im - return sample - - -@register_op -class RandomFlipImage(BaseOperator): - def __init__(self, prob=0.5, is_normalized=False, is_mask_flip=False): - """ - Args: - prob (float): the probability of flipping image - is_normalized (bool): whether the bbox scale to [0,1] - is_mask_flip (bool): whether flip the segmentation - """ - super(RandomFlipImage, self).__init__() - self.prob = prob - self.is_normalized = is_normalized - self.is_mask_flip = is_mask_flip - if not (isinstance(self.prob, float) and - isinstance(self.is_normalized, bool) and - isinstance(self.is_mask_flip, bool)): - raise TypeError("{}: input type is invalid.".format(self)) - - def flip_segms(self, segms, height, width): - def _flip_poly(poly, width): - flipped_poly = np.array(poly) - flipped_poly[0::2] = width - np.array(poly[0::2]) - 1 - return flipped_poly.tolist() - - def _flip_rle(rle, height, width): - if 'counts' in rle and type(rle['counts']) == list: - rle = mask_util.frPyObjects([rle], height, width) - mask = mask_util.decode(rle) - mask = mask[:, ::-1, :] - rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8)) - return rle - - def is_poly(segm): - assert isinstance(segm, (list, dict)), \ - "Invalid segm type: {}".format(type(segm)) - return isinstance(segm, list) - - flipped_segms = [] - for segm in segms: - if is_poly(segm): - # Polygon format - flipped_segms.append([_flip_poly(poly, width) for poly in segm]) - else: - # RLE format - import pycocotools.mask as mask_util - flipped_segms.append(_flip_rle(segm, height, width)) - return flipped_segms - - def __call__(self, sample, context=None): - """Filp the image and bounding box. - Operators: - 1. Flip the image numpy. - 2. Transform the bboxes' x coordinates. - (Must judge whether the coordinates are normalized!) - 3. Transform the segmentations' x coordinates. - (Must judge whether the coordinates are normalized!) - Output: - sample: the image, bounding box and segmentation part - in sample are flipped. - """ - gt_bbox = sample['gt_bbox'] - im = sample['image'] - if not isinstance(im, np.ndarray): - raise TypeError("{}: image is not a numpy array.".format(self)) - if len(im.shape) != 3: - raise ImageError("{}: image is not 3-dimensional.".format(self)) - height, width, _ = im.shape - if np.random.uniform(0, 1) < self.prob: - im = im[:, ::-1, :] - if gt_bbox.shape[0] == 0: - return sample - oldx1 = gt_bbox[:, 0].copy() - oldx2 = gt_bbox[:, 2].copy() - if self.is_normalized: - gt_bbox[:, 0] = 1 - oldx2 - gt_bbox[:, 2] = 1 - oldx1 - else: - gt_bbox[:, 0] = width - oldx2 - 1 - gt_bbox[:, 2] = width - oldx1 - 1 - if gt_bbox.shape[0] != 0 and (gt_bbox[:, 2] < gt_bbox[:, 0]).all(): - m = "{}: invalid box, x2 should be greater than x1".format(self) - raise BboxError(m) - sample['gt_bbox'] = gt_bbox - if self.is_mask_flip and len(sample['gt_poly']) != 0: - sample['gt_poly'] = self.flip_segms(sample['gt_poly'], height, - width) - sample['flipped'] = True - sample['image'] = im - return sample - - -@register_op -class NormalizeImage(BaseOperator): - def __init__(self, - mean=[0.485, 0.456, 0.406], - std=[1, 1, 1], - is_scale=True, - is_channel_first=True): - """ - Args: - mean (list): the pixel mean - std (list): the pixel variance - """ - super(NormalizeImage, self).__init__() - self.mean = mean - self.std = std - self.is_scale = is_scale - self.is_channel_first = is_channel_first - if not (isinstance(self.mean, list) and isinstance(self.std, list) and - isinstance(self.is_scale, bool)): - raise TypeError("{}: input type is invalid.".format(self)) - from functools import reduce - if reduce(lambda x, y: x * y, self.std) == 0: - raise ValueError('{}: std is invalid!'.format(self)) - - def __call__(self, sample, context=None): - """Normalize the image. - Operators: - 1.(optional) Scale the image to [0,1] - 2. Each pixel minus mean and is divided by std - """ - for k in sample.keys(): - if 'image' in k: - im = sample[k] - im = im.astype(np.float32, copy=False) - if self.is_channel_first: - mean = np.array(self.mean)[:, np.newaxis, np.newaxis] - std = np.array(self.std)[:, np.newaxis, np.newaxis] - else: - mean = np.array(self.mean)[np.newaxis, np.newaxis, :] - std = np.array(self.std)[np.newaxis, np.newaxis, :] - if self.is_scale: - im = im / 255.0 - im -= mean - im /= std - sample[k] = im - return sample - - -@register_op -class RandomDistort(BaseOperator): - def __init__(self, - brightness_lower=0.5, - brightness_upper=1.5, - contrast_lower=0.5, - contrast_upper=1.5, - saturation_lower=0.5, - saturation_upper=1.5, - hue_lower=-18, - hue_upper=18, - brightness_prob=0.5, - contrast_prob=0.5, - saturation_prob=0.5, - hue_prob=0.5, - count=4, - is_order=False): - """ - Args: - brightness_lower/ brightness_upper (float): the brightness - between brightness_lower and brightness_upper - contrast_lower/ contrast_upper (float): the contrast between - contrast_lower and contrast_lower - saturation_lower/ saturation_upper (float): the saturation - between saturation_lower and saturation_upper - hue_lower/ hue_upper (float): the hue between - hue_lower and hue_upper - brightness_prob (float): the probability of changing brightness - contrast_prob (float): the probability of changing contrast - saturation_prob (float): the probability of changing saturation - hue_prob (float): the probability of changing hue - count (int): the kinds of doing distrot - is_order (bool): whether determine the order of distortion - """ - super(RandomDistort, self).__init__() - self.brightness_lower = brightness_lower - self.brightness_upper = brightness_upper - self.contrast_lower = contrast_lower - self.contrast_upper = contrast_upper - self.saturation_lower = saturation_lower - self.saturation_upper = saturation_upper - self.hue_lower = hue_lower - self.hue_upper = hue_upper - self.brightness_prob = brightness_prob - self.contrast_prob = contrast_prob - self.saturation_prob = saturation_prob - self.hue_prob = hue_prob - self.count = count - self.is_order = is_order - - def random_brightness(self, img): - brightness_delta = np.random.uniform(self.brightness_lower, - self.brightness_upper) - prob = np.random.uniform(0, 1) - if prob < self.brightness_prob: - img = ImageEnhance.Brightness(img).enhance(brightness_delta) - return img - - def random_contrast(self, img): - contrast_delta = np.random.uniform(self.contrast_lower, - self.contrast_upper) - prob = np.random.uniform(0, 1) - if prob < self.contrast_prob: - img = ImageEnhance.Contrast(img).enhance(contrast_delta) - return img - - def random_saturation(self, img): - saturation_delta = np.random.uniform(self.saturation_lower, - self.saturation_upper) - prob = np.random.uniform(0, 1) - if prob < self.saturation_prob: - img = ImageEnhance.Color(img).enhance(saturation_delta) - return img - - def random_hue(self, img): - hue_delta = np.random.uniform(self.hue_lower, self.hue_upper) - prob = np.random.uniform(0, 1) - if prob < self.hue_prob: - img = np.array(img.convert('HSV')) - img[:, :, 0] = img[:, :, 0] + hue_delta - img = Image.fromarray(img, mode='HSV').convert('RGB') - return img - - def __call__(self, sample, context): - """random distort the image""" - ops = [ - self.random_brightness, self.random_contrast, - self.random_saturation, self.random_hue - ] - if self.is_order: - prob = np.random.uniform(0, 1) - if prob < 0.5: - ops = [ - self.random_brightness, - self.random_saturation, - self.random_hue, - self.random_contrast, - ] - else: - ops = random.sample(ops, self.count) - assert 'image' in sample, "image data not found" - im = sample['image'] - im = Image.fromarray(im) - for id in range(self.count): - im = ops[id](im) - im = np.asarray(im) - sample['image'] = im - return sample - - -@register_op -class ExpandImage(BaseOperator): - def __init__(self, max_ratio, prob, mean=[127.5, 127.5, 127.5]): - """ - Args: - max_ratio (float): the ratio of expanding - prob (float): the probability of expanding image - mean (list): the pixel mean - """ - super(ExpandImage, self).__init__() - self.max_ratio = max_ratio - self.mean = mean - self.prob = prob - - def __call__(self, sample, context): - """ - Expand the image and modify bounding box. - Operators: - 1. Scale the image width and height. - 2. Construct new images with new height and width. - 3. Fill the new image with the mean. - 4. Put original imge into new image. - 5. Rescale the bounding box. - 6. Determine if the new bbox is satisfied in the new image. - Returns: - sample: the image, bounding box are replaced. - """ - - prob = np.random.uniform(0, 1) - assert 'image' in sample, 'not found image data' - im = sample['image'] - gt_bbox = sample['gt_bbox'] - gt_class = sample['gt_class'] - im_width = sample['w'] - im_height = sample['h'] - if prob < self.prob: - if self.max_ratio - 1 >= 0.01: - expand_ratio = np.random.uniform(1, self.max_ratio) - height = int(im_height * expand_ratio) - width = int(im_width * expand_ratio) - h_off = math.floor(np.random.uniform(0, height - im_height)) - w_off = math.floor(np.random.uniform(0, width - im_width)) - expand_bbox = [ - -w_off / im_width, -h_off / im_height, - (width - w_off) / im_width, (height - h_off) / im_height - ] - expand_im = np.ones((height, width, 3)) - expand_im = np.uint8(expand_im * np.squeeze(self.mean)) - expand_im = Image.fromarray(expand_im) - im = Image.fromarray(im) - expand_im.paste(im, (int(w_off), int(h_off))) - expand_im = np.asarray(expand_im) - gt_bbox, gt_class, _ = filter_and_process(expand_bbox, gt_bbox, - gt_class) - sample['image'] = expand_im - sample['gt_bbox'] = gt_bbox - sample['gt_class'] = gt_class - sample['w'] = width - sample['h'] = height - - return sample - - -@register_op -class CropImage(BaseOperator): - def __init__(self, batch_sampler, satisfy_all=False, avoid_no_bbox=True): - """ - Args: - batch_sampler (list): Multiple sets of different - parameters for cropping. - satisfy_all (bool): whether all boxes must satisfy. - e.g.[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0], - [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]] - [max sample, max trial, min scale, max scale, - min aspect ratio, max aspect ratio, - min overlap, max overlap] - avoid_no_bbox (bool): whether to to avoid the - situation where the box does not appear. - """ - super(CropImage, self).__init__() - self.batch_sampler = batch_sampler - self.satisfy_all = satisfy_all - self.avoid_no_bbox = avoid_no_bbox - - def __call__(self, sample, context): - """ - Crop the image and modify bounding box. - Operators: - 1. Scale the image width and height. - 2. Crop the image according to a radom sample. - 3. Rescale the bounding box. - 4. Determine if the new bbox is satisfied in the new image. - Returns: - sample: the image, bounding box are replaced. - """ - assert 'image' in sample, "image data not found" - im = sample['image'] - gt_bbox = sample['gt_bbox'] - gt_class = sample['gt_class'] - im_width = sample['w'] - im_height = sample['h'] - gt_score = None - if 'gt_score' in sample: - gt_score = sample['gt_score'] - sampled_bbox = [] - gt_bbox = gt_bbox.tolist() - for sampler in self.batch_sampler: - found = 0 - for i in range(sampler[1]): - if found >= sampler[0]: - break - sample_bbox = generate_sample_bbox(sampler) - if satisfy_sample_constraint(sampler, sample_bbox, gt_bbox, - self.satisfy_all): - sampled_bbox.append(sample_bbox) - found = found + 1 - im = np.array(im) - while sampled_bbox: - idx = int(np.random.uniform(0, len(sampled_bbox))) - sample_bbox = sampled_bbox.pop(idx) - sample_bbox = clip_bbox(sample_bbox) - crop_bbox, crop_class, crop_score = \ - filter_and_process(sample_bbox, gt_bbox, gt_class, gt_score) - if self.avoid_no_bbox: - if len(crop_bbox) < 1: - continue - xmin = int(sample_bbox[0] * im_width) - xmax = int(sample_bbox[2] * im_width) - ymin = int(sample_bbox[1] * im_height) - ymax = int(sample_bbox[3] * im_height) - im = im[ymin:ymax, xmin:xmax] - sample['image'] = im - sample['gt_bbox'] = crop_bbox - sample['gt_class'] = crop_class - sample['gt_score'] = crop_score - return sample - return sample - - -@register_op -class CropImageWithDataAchorSampling(BaseOperator): - def __init__(self, - batch_sampler, - anchor_sampler=None, - target_size=None, - das_anchor_scales=[16, 32, 64, 128], - sampling_prob=0.5, - min_size=8., - avoid_no_bbox=True): - """ - Args: - anchor_sampler (list): anchor_sampling sets of different - parameters for cropping. - batch_sampler (list): Multiple sets of different - parameters for cropping. - e.g.[[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]] - [[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0], - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0], - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0], - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0], - [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]] - [max sample, max trial, min scale, max scale, - min aspect ratio, max aspect ratio, - min overlap, max overlap, min coverage, max coverage] - target_size (bool): target image size. - das_anchor_scales (list[float]): a list of anchor scales in data - anchor smapling. - min_size (float): minimum size of sampled bbox. - avoid_no_bbox (bool): whether to to avoid the - situation where the box does not appear. - """ - super(CropImageWithDataAchorSampling, self).__init__() - self.anchor_sampler = anchor_sampler - self.batch_sampler = batch_sampler - self.target_size = target_size - self.sampling_prob = sampling_prob - self.min_size = min_size - self.avoid_no_bbox = avoid_no_bbox - self.das_anchor_scales = np.array(das_anchor_scales) - - def __call__(self, sample, context): - """ - Crop the image and modify bounding box. - Operators: - 1. Scale the image width and height. - 2. Crop the image according to a radom sample. - 3. Rescale the bounding box. - 4. Determine if the new bbox is satisfied in the new image. - Returns: - sample: the image, bounding box are replaced. - """ - assert 'image' in sample, "image data not found" - im = sample['image'] - gt_bbox = sample['gt_bbox'] - gt_class = sample['gt_class'] - image_width = sample['w'] - image_height = sample['h'] - gt_score = None - if 'gt_score' in sample: - gt_score = sample['gt_score'] - sampled_bbox = [] - gt_bbox = gt_bbox.tolist() - - prob = np.random.uniform(0., 1.) - if prob > self.sampling_prob: # anchor sampling - assert self.anchor_sampler - for sampler in self.anchor_sampler: - found = 0 - for i in range(sampler[1]): - if found >= sampler[0]: - break - sample_bbox = data_anchor_sampling( - gt_bbox, image_width, image_height, - self.das_anchor_scales, self.target_size) - if sample_bbox == 0: - break - if satisfy_sample_constraint_coverage(sampler, sample_bbox, - gt_bbox): - sampled_bbox.append(sample_bbox) - found = found + 1 - im = np.array(im) - while sampled_bbox: - idx = int(np.random.uniform(0, len(sampled_bbox))) - sample_bbox = sampled_bbox.pop(idx) - - crop_bbox, crop_class, crop_score = filter_and_process( - sample_bbox, gt_bbox, gt_class, gt_score) - crop_bbox, crop_class, crop_score = bbox_area_sampling( - crop_bbox, crop_class, crop_score, self.target_size, - self.min_size) - - if self.avoid_no_bbox: - if len(crop_bbox) < 1: - continue - im = crop_image_sampling(im, sample_bbox, image_width, - image_height, self.target_size) - sample['image'] = im - sample['gt_bbox'] = crop_bbox - sample['gt_class'] = crop_class - sample['gt_score'] = crop_score - return sample - return sample - - else: - for sampler in self.batch_sampler: - found = 0 - for i in range(sampler[1]): - if found >= sampler[0]: - break - sample_bbox = generate_sample_bbox_square( - sampler, image_width, image_height) - if satisfy_sample_constraint_coverage(sampler, sample_bbox, - gt_bbox): - sampled_bbox.append(sample_bbox) - found = found + 1 - im = np.array(im) - while sampled_bbox: - idx = int(np.random.uniform(0, len(sampled_bbox))) - sample_bbox = sampled_bbox.pop(idx) - sample_bbox = clip_bbox(sample_bbox) - - crop_bbox, crop_class, crop_score = filter_and_process( - sample_bbox, gt_bbox, gt_class, gt_score) - # sampling bbox according the bbox area - crop_bbox, crop_class, crop_score = bbox_area_sampling( - crop_bbox, crop_class, crop_score, self.target_size, - self.min_size) - - if self.avoid_no_bbox: - if len(crop_bbox) < 1: - continue - xmin = int(sample_bbox[0] * image_width) - xmax = int(sample_bbox[2] * image_width) - ymin = int(sample_bbox[1] * image_height) - ymax = int(sample_bbox[3] * image_height) - im = im[ymin:ymax, xmin:xmax] - sample['image'] = im - sample['gt_bbox'] = crop_bbox - sample['gt_class'] = crop_class - sample['gt_score'] = crop_score - return sample - return sample - - -@register_op -class NormalizeBox(BaseOperator): - """Transform the bounding box's coornidates to [0,1].""" - - def __init__(self): - super(NormalizeBox, self).__init__() - - def __call__(self, sample, context): - gt_bbox = sample['gt_bbox'] - width = sample['w'] - height = sample['h'] - for i in range(gt_bbox.shape[0]): - gt_bbox[i][0] = gt_bbox[i][0] / width - gt_bbox[i][1] = gt_bbox[i][1] / height - gt_bbox[i][2] = gt_bbox[i][2] / width - gt_bbox[i][3] = gt_bbox[i][3] / height - sample['gt_bbox'] = gt_bbox - return sample - - -@register_op -class Permute(BaseOperator): - def __init__(self, to_bgr=True, channel_first=True): - """ - Change the channel. - Args: - to_bgr (bool): confirm whether to convert RGB to BGR - channel_first (bool): confirm whether to change channel - - """ - super(Permute, self).__init__() - self.to_bgr = to_bgr - self.channel_first = channel_first - if not (isinstance(self.to_bgr, bool) and - isinstance(self.channel_first, bool)): - raise TypeError("{}: input type is invalid.".format(self)) - - def __call__(self, sample, context=None): - assert 'image' in sample, "image data not found" - for k in sample.keys(): - if 'image' in k: - im = sample[k] - if self.channel_first: - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - if self.to_bgr: - im = im[[2, 1, 0], :, :] - sample[k] = im - return sample - - -@register_op -class MixupImage(BaseOperator): - def __init__(self, alpha=1.5, beta=1.5): - """ Mixup image and gt_bbbox/gt_score - Args: - alpha (float): alpha parameter of beta distribute - beta (float): beta parameter of beta distribute - """ - super(MixupImage, self).__init__() - self.alpha = alpha - self.beta = beta - if self.alpha <= 0.0: - raise ValueError("alpha shold be positive in {}".format(self)) - if self.beta <= 0.0: - raise ValueError("beta shold be positive in {}".format(self)) - - def _mixup_img(self, img1, img2, factor): - h = max(img1.shape[0], img2.shape[0]) - w = max(img1.shape[1], img2.shape[1]) - img = np.zeros((h, w, img1.shape[2]), 'float32') - img[:img1.shape[0], :img1.shape[1], :] = \ - img1.astype('float32') * factor - img[:img2.shape[0], :img2.shape[1], :] += \ - img2.astype('float32') * (1.0 - factor) - return img.astype('uint8') - - def __call__(self, sample, context=None): - if 'mixup' not in sample: - return sample - factor = np.random.beta(self.alpha, self.beta) - factor = max(0.0, min(1.0, factor)) - if factor >= 1.0: - sample.pop('mixup') - return sample - if factor <= 0.0: - return sample['mixup'] - im = self._mixup_img(sample['image'], sample['mixup']['image'], factor) - gt_bbox1 = sample['gt_bbox'] - gt_bbox2 = sample['mixup']['gt_bbox'] - gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0) - gt_class1 = sample['gt_class'] - gt_class2 = sample['mixup']['gt_class'] - gt_class = np.concatenate((gt_class1, gt_class2), axis=0) - - gt_score1 = sample['gt_score'] - gt_score2 = sample['mixup']['gt_score'] - gt_score = np.concatenate( - (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0) - sample['image'] = im - sample['gt_bbox'] = gt_bbox - sample['gt_score'] = gt_score - sample['gt_class'] = gt_class - sample['h'] = im.shape[0] - sample['w'] = im.shape[1] - sample.pop('mixup') - return sample - - -@register_op -class RandomInterpImage(BaseOperator): - def __init__(self, target_size=0, max_size=0): - """ - Random reisze image by multiply interpolate method. - Args: - target_size (int): the taregt size of image's short side - max_size (int): the max size of image - """ - super(RandomInterpImage, self).__init__() - self.target_size = target_size - self.max_size = max_size - if not (isinstance(self.target_size, int) and - isinstance(self.max_size, int)): - raise TypeError('{}: input type is invalid.'.format(self)) - interps = [ - cv2.INTER_NEAREST, - cv2.INTER_LINEAR, - cv2.INTER_AREA, - cv2.INTER_CUBIC, - cv2.INTER_LANCZOS4, - ] - self.resizers = [] - for interp in interps: - self.resizers.append(ResizeImage(target_size, max_size, interp)) - - def __call__(self, sample, context=None): - """Resise the image numpy by random resizer.""" - resizer = random.choice(self.resizers) - return resizer(sample, context) diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/parallel_map.py b/PaddleCV/PaddleDetection/ppdet/data/transform/parallel_map.py deleted file mode 100644 index 2ba55a8d203cfa7f467e1790b372cf6ce6e02fdb..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/transform/parallel_map.py +++ /dev/null @@ -1,225 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# function: -# transform samples in 'source' using 'mapper' - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import sys -import six -import uuid -import logging -import signal -import threading -from .transformer import ProxiedDataset - -logger = logging.getLogger(__name__) - - -class EndSignal(object): - def __init__(self, errno=0, errmsg=''): - self.errno = errno - self.errmsg = errmsg - - -class ParallelMappedDataset(ProxiedDataset): - """ - Transform samples to mapped samples which is similar to 'basic.MappedDataset', - but multiple workers (threads or processes) will be used - - Notes: - this class is not thread-safe - """ - - def __init__(self, source, mapper, worker_args): - super(ParallelMappedDataset, self).__init__(source) - worker_args = {k.lower(): v for k, v in worker_args.items()} - - args = {'bufsize': 100, 'worker_num': 8, - 'use_process': False, 'memsize': '3G'} - args.update(worker_args) - if args['use_process'] and type(args['memsize']) is str: - assert args['memsize'][-1].lower() == 'g', \ - "invalid param for memsize[%s], should be ended with 'G' or 'g'" % (args['memsize']) - gb = args['memsize'][:-1] - args['memsize'] = int(gb) * 1024 ** 3 - - self._worker_args = args - self._started = False - self._source = source - self._mapper = mapper - self._exit = False - self._setup() - - def _setup(self): - """setup input/output queues and workers """ - use_process = self._worker_args.get('use_process', False) - if use_process and sys.platform == "win32": - logger.info("Use multi-thread reader instead of " - "multi-process reader on Windows.") - use_process = False - - bufsize = self._worker_args['bufsize'] - if use_process: - from .shared_queue import SharedQueue as Queue - from multiprocessing import Process as Worker - from multiprocessing import Event - memsize = self._worker_args['memsize'] - self._inq = Queue(bufsize, memsize=memsize) - self._outq = Queue(bufsize, memsize=memsize) - else: - if six.PY3: - from queue import Queue - else: - from Queue import Queue - from threading import Thread as Worker - from threading import Event - self._inq = Queue(bufsize) - self._outq = Queue(bufsize) - - consumer_num = self._worker_args['worker_num'] - id = str(uuid.uuid4())[-3:] - self._producer = threading.Thread( - target=self._produce, - args=('producer-' + id, self._source, self._inq)) - self._producer.daemon = True - - self._consumers = [] - for i in range(consumer_num): - p = Worker( - target=self._consume, - args=('consumer-' + id + '_' + str(i), self._inq, self._outq, - self._mapper)) - self._consumers.append(p) - p.daemon = True - - self._epoch = -1 - self._feeding_ev = Event() - self._produced = 0 # produced sample in self._produce - self._consumed = 0 # consumed sample in self.next - self._stopped_consumers = 0 - - def _produce(self, id, source, inq): - """Fetch data from source and feed it to 'inq' queue""" - while True: - self._feeding_ev.wait() - if self._exit: - break - try: - inq.put(source.next()) - self._produced += 1 - except StopIteration: - self._feeding_ev.clear() - self._feeding_ev.wait() # wait other guy to wake up me - logger.debug("producer[{}] starts new epoch".format(id)) - except Exception as e: - msg = "producer[{}] failed with error: {}".format(id, str(e)) - inq.put(EndSignal(-1, msg)) - break - - logger.debug("producer[{}] exits".format(id)) - - def _consume(self, id, inq, outq, mapper): - """Fetch data from 'inq', process it and put result to 'outq'""" - while True: - sample = inq.get() - if isinstance(sample, EndSignal): - sample.errmsg += "[consumer[{}] exits]".format(id) - outq.put(sample) - logger.debug("end signal received, " + - "consumer[{}] exits".format(id)) - break - - try: - result = mapper(sample) - outq.put(result) - except Exception as e: - msg = 'failed to map consumer[%s], error: {}'.format(str(e), id) - outq.put(EndSignal(-1, msg)) - break - - def drained(self): - assert self._epoch >= 0, "first epoch has not started yet" - return self._source.drained() and self._produced == self._consumed - - def stop(self): - """ notify to exit - """ - self._exit = True - self._feeding_ev.set() - for _ in range(len(self._consumers)): - self._inq.put(EndSignal(0, "notify consumers to exit")) - - def next(self): - """ get next transformed sample - """ - if self._epoch < 0: - self.reset() - - if self.drained(): - raise StopIteration() - - while True: - sample = self._outq.get() - if isinstance(sample, EndSignal): - self._stopped_consumers += 1 - if sample.errno != 0: - logger.warn("consumer failed with error: {}".format( - sample.errmsg)) - - if self._stopped_consumers < len(self._consumers): - self._inq.put(sample) - else: - raise ValueError("all consumers exited, no more samples") - else: - self._consumed += 1 - return sample - - def reset(self): - """ reset for a new epoch of samples - """ - if self._epoch < 0: - self._epoch = 0 - for p in self._consumers: - p.start() - self._producer.start() - else: - if not self.drained(): - logger.warn("do not reset before epoch[%d] finishes".format( - self._epoch)) - self._produced = self._produced - self._consumed - else: - self._produced = 0 - - self._epoch += 1 - - assert self._stopped_consumers == 0, "some consumers already exited," \ - + " cannot start another epoch" - - self._source.reset() - self._consumed = 0 - self._feeding_ev.set() - - -# FIXME(dengkaipeng): fix me if you have better impliment -# handle terminate reader process, do not print stack frame -def _reader_exit(signum, frame): - logger.debug("Reader process exit.") - sys.exit() - - -signal.signal(signal.SIGTERM, _reader_exit) diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/post_map.py b/PaddleCV/PaddleDetection/ppdet/data/transform/post_map.py deleted file mode 100644 index d556160e5a485753fe2d68600d320fcda6c91496..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/transform/post_map.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import logging -import cv2 -import numpy as np - -logger = logging.getLogger(__name__) - - -def build_post_map(coarsest_stride=1, - is_padding=False, - random_shapes=[], - multi_scales=[], - use_padded_im_info=False, - enable_multiscale_test=False, - num_scale=1): - """ - Build a mapper for post-processing batches - - Args: - config (dict of parameters): - { - coarsest_stride (int): stride of the coarsest FPN level - is_padding (bool): whether to padding in minibatch - random_shapes (list of int): resize to image to random shapes, - [] for not resize. - multi_scales (list of int): resize image by random scales, - [] for not resize. - use_padded_im_info (bool): whether to update im_info after padding - enable_multiscale_test (bool): whether to use multiscale test. - num_scale (int) : the number of scales for multiscale test. - } - Returns: - a mapper function which accept one argument 'batch' and - return the processed result - """ - - def padding_minibatch(batch_data): - if len(batch_data) == 1 and coarsest_stride == 1: - return batch_data - max_shape = np.array([data[0].shape for data in batch_data]).max(axis=0) - if coarsest_stride > 1: - max_shape[1] = int( - np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride) - max_shape[2] = int( - np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride) - padding_batch = [] - for data in batch_data: - im_c, im_h, im_w = data[0].shape[:] - padding_im = np.zeros( - (im_c, max_shape[1], max_shape[2]), dtype=np.float32) - padding_im[:, :im_h, :im_w] = data[0] - if use_padded_im_info: - data[1][:2] = max_shape[1:3] - padding_batch.append((padding_im, ) + data[1:]) - return padding_batch - - def padding_multiscale_test(batch_data): - if len(batch_data) != 1: - raise NotImplementedError( - "Batch size must be 1 when using multiscale test, but now batch size is {}". - format(len(batch_data))) - if coarsest_stride > 1: - padding_batch = [] - padding_images = [] - data = batch_data[0] - for i, input in enumerate(data): - if i < num_scale: - im_c, im_h, im_w = input.shape - max_h = int( - np.ceil(im_h / coarsest_stride) * coarsest_stride) - max_w = int( - np.ceil(im_w / coarsest_stride) * coarsest_stride) - padding_im = np.zeros( - (im_c, max_h, max_w), dtype=np.float32) - padding_im[:, :im_h, :im_w] = input - data[num_scale][3 * i:3 * i + 2] = [max_h, max_w] - padding_batch.append(padding_im) - else: - padding_batch.append(input) - return [tuple(padding_batch)] - # no need to padding - return batch_data - - def random_shape(batch_data): - # For YOLO: gt_bbox is normalized, is scale invariant. - shape = np.random.choice(random_shapes) - scaled_batch = [] - h, w = batch_data[0][0].shape[1:3] - scale_x = float(shape) / w - scale_y = float(shape) / h - for data in batch_data: - im = cv2.resize( - data[0].transpose((1, 2, 0)), - None, - None, - fx=scale_x, - fy=scale_y, - interpolation=cv2.INTER_NEAREST) - scaled_batch.append((im.transpose(2, 0, 1), ) + data[1:]) - return scaled_batch - - def multi_scale_resize(batch_data): - # For RCNN: image shape in record in im_info. - scale = np.random.choice(multi_scales) - scaled_batch = [] - for data in batch_data: - im = cv2.resize( - data[0].transpose((1, 2, 0)), - None, - None, - fx=scale, - fy=scale, - interpolation=cv2.INTER_NEAREST) - im_info = [im.shape[:2], scale] - scaled_batch.append((im.transpose(2, 0, 1), im_info) + data[2:]) - return scaled_batch - - def _mapper(batch_data): - try: - if is_padding: - batch_data = padding_minibatch(batch_data) - if len(random_shapes) > 0: - batch_data = random_shape(batch_data) - if len(multi_scales) > 0: - batch_data = multi_scale_resize(batch_data) - if enable_multiscale_test: - batch_data = padding_multiscale_test(batch_data) - except Exception as e: - errmsg = "post-process failed with error: " + str(e) - logger.warn(errmsg) - raise e - - return batch_data - - return _mapper diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/shared_queue/__init__.py b/PaddleCV/PaddleDetection/ppdet/data/transform/shared_queue/__init__.py deleted file mode 100644 index f118eb76a5f8349162a3a5dfc41bed2bb26a2cb0..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/transform/shared_queue/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -__all__ = ['SharedBuffer', 'SharedMemoryMgr', 'SharedQueue'] - -from .sharedmemory import SharedBuffer -from .sharedmemory import SharedMemoryMgr -from .sharedmemory import SharedMemoryError -from .queue import SharedQueue diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/shared_queue/queue.py b/PaddleCV/PaddleDetection/ppdet/data/transform/shared_queue/queue.py deleted file mode 100644 index 0bd44d3e9e0b34cefa3adfd8008a90e0963c5b62..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/transform/shared_queue/queue.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import sys -import six -if six.PY3: - import pickle - from io import BytesIO as StringIO -else: - import cPickle as pickle - from cStringIO import StringIO - -import logging -import traceback -import multiprocessing as mp -from multiprocessing.queues import Queue -from .sharedmemory import SharedMemoryMgr - -logger = logging.getLogger(__name__) - - -class SharedQueueError(ValueError): - """ SharedQueueError - """ - pass - - -class SharedQueue(Queue): - """ a Queue based on shared memory to communicate data between Process, - and it's interface is compatible with 'multiprocessing.queues.Queue' - """ - - def __init__(self, maxsize=0, mem_mgr=None, memsize=None, pagesize=None): - """ init - """ - if six.PY3: - super(SharedQueue, self).__init__(maxsize, ctx=mp.get_context()) - else: - super(SharedQueue, self).__init__(maxsize) - - if mem_mgr is not None: - self._shared_mem = mem_mgr - else: - self._shared_mem = SharedMemoryMgr( - capacity=memsize, pagesize=pagesize) - - def put(self, obj, **kwargs): - """ put an object to this queue - """ - obj = pickle.dumps(obj, -1) - buff = None - try: - buff = self._shared_mem.malloc(len(obj)) - buff.put(obj) - super(SharedQueue, self).put(buff, **kwargs) - except Exception as e: - stack_info = traceback.format_exc() - err_msg = 'failed to put a element to SharedQueue '\ - 'with stack info[%s]' % (stack_info) - logger.warn(err_msg) - - if buff is not None: - buff.free() - raise e - - def get(self, **kwargs): - """ get an object from this queue - """ - buff = None - try: - buff = super(SharedQueue, self).get(**kwargs) - data = buff.get() - return pickle.load(StringIO(data)) - except Exception as e: - stack_info = traceback.format_exc() - err_msg = 'failed to get element from SharedQueue '\ - 'with stack info[%s]' % (stack_info) - logger.warn(err_msg) - raise e - finally: - if buff is not None: - buff.free() - - def release(self): - self._shared_mem.release() - self._shared_mem = None diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/shared_queue/sharedmemory.py b/PaddleCV/PaddleDetection/ppdet/data/transform/shared_queue/sharedmemory.py deleted file mode 100644 index 765708d3b960b61fac8cd2ed985e29f43bab0fe0..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/transform/shared_queue/sharedmemory.py +++ /dev/null @@ -1,535 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# utils for memory management which is allocated on sharedmemory, -# note that these structures may not be thread-safe - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import os -import time -import math -import struct -import sys -import six - -if six.PY3: - import pickle -else: - import cPickle as pickle - -import json -import uuid -import random -import numpy as np -import weakref -import logging -from multiprocessing import Lock -from multiprocessing import RawArray - -logger = logging.getLogger(__name__) - - -class SharedMemoryError(ValueError): - """ SharedMemoryError - """ - pass - - -class SharedBufferError(SharedMemoryError): - """ SharedBufferError - """ - pass - - -class MemoryFullError(SharedMemoryError): - """ MemoryFullError - """ - - def __init__(self, errmsg=''): - super(MemoryFullError, self).__init__() - self.errmsg = errmsg - - -def memcopy(dst, src, offset=0, length=None): - """ copy data from 'src' to 'dst' in bytes - """ - length = length if length is not None else len(src) - assert type(dst) == np.ndarray, 'invalid type for "dst" in memcopy' - if type(src) is not np.ndarray: - if type(src) is str and six.PY3: - src = src.encode() - src = np.frombuffer(src, dtype='uint8', count=len(src)) - - dst[:] = src[offset:offset + length] - - -class SharedBuffer(object): - """ Buffer allocated from SharedMemoryMgr, and it stores data on shared memory - - note that: - every instance of this should be freed explicitely by calling 'self.free' - """ - - def __init__(self, owner, capacity, pos, size=0, alloc_status=''): - """ Init - - Args: - owner (str): manager to own this buffer - capacity (int): capacity in bytes for this buffer - pos (int): page position in shared memory - size (int): bytes already used - alloc_status (str): debug info about allocator when allocate this - """ - self._owner = owner - self._cap = capacity - self._pos = pos - self._size = size - self._alloc_status = alloc_status - assert self._pos >= 0 and self._cap > 0, \ - "invalid params[%d:%d] to construct SharedBuffer" \ - % (self._pos, self._cap) - - def owner(self): - """ get owner - """ - return SharedMemoryMgr.get_mgr(self._owner) - - def put(self, data, override=False): - """ put data to this buffer - - Args: - data (str): data to be stored in this buffer - - Returns: - None - - Raises: - SharedMemoryError when not enough space in this buffer - """ - assert type(data) in [str, bytes], \ - 'invalid type[%s] for SharedBuffer::put' % (str(type(data))) - if self._size > 0 and not override: - raise SharedBufferError('already has already been setted before') - - if self.capacity() < len(data): - raise SharedBufferError('data[%d] is larger than size of buffer[%s]'\ - % (len(data), str(self))) - - self.owner().put_data(self, data) - self._size = len(data) - - def get(self, offset=0, size=None, no_copy=True): - """ get the data stored this buffer - - Args: - offset (int): position for the start point to 'get' - size (int): size to get - - Returns: - data (np.ndarray('uint8')): user's data in numpy - which is passed in by 'put' - None: if no data stored in - """ - offset = offset if offset >= 0 else self._size + offset - if self._size <= 0: - return None - - size = self._size if size is None else size - assert offset + size <= self._cap, 'invalid offset[%d] '\ - 'or size[%d] for capacity[%d]' % (offset, size, self._cap) - return self.owner().get_data(self, offset, size, no_copy=no_copy) - - def size(self): - """ bytes of used memory - """ - return self._size - - def resize(self, size): - """ resize the used memory to 'size', should not be greater than capacity - """ - assert size >= 0 and size <= self._cap, \ - "invalid size[%d] for resize" % (size) - - self._size = size - - def capacity(self): - """ size of allocated memory - """ - return self._cap - - def __str__(self): - """ human readable format - """ - return "SharedBuffer(owner:%s, pos:%d, size:%d, "\ - "capacity:%d, alloc_status:[%s], pid:%d)" \ - % (str(self._owner), self._pos, self._size, \ - self._cap, self._alloc_status, os.getpid()) - - def free(self): - """ free this buffer to it's owner - """ - if self._owner is not None: - self.owner().free(self) - self._owner = None - self._cap = 0 - self._pos = -1 - self._size = 0 - return True - else: - return False - - -class PageAllocator(object): - """ allocator used to malloc and free shared memory which - is split into pages - """ - s_allocator_header = 12 - - def __init__(self, base, total_pages, page_size): - """ init - """ - self._magic_num = 1234321000 + random.randint(100, 999) - self._base = base - self._total_pages = total_pages - self._page_size = page_size - - header_pages = int( - math.ceil((total_pages + self.s_allocator_header) / page_size)) - - self._header_pages = header_pages - self._free_pages = total_pages - header_pages - self._header_size = self._header_pages * page_size - self._reset() - - def _dump_alloc_info(self, fname): - hpages, tpages, pos, used = self.header() - - start = self.s_allocator_header - end = start + self._page_size * hpages - alloc_flags = self._base[start:end].tostring() - info = { - 'magic_num': self._magic_num, - 'header_pages': hpages, - 'total_pages': tpages, - 'pos': pos, - 'used': used - } - info['alloc_flags'] = alloc_flags - fname = fname + '.' + str(uuid.uuid4())[:6] - with open(fname, 'wb') as f: - f.write(pickle.dumps(info, -1)) - logger.warn('dump alloc info to file[%s]' % (fname)) - - def _reset(self): - alloc_page_pos = self._header_pages - used_pages = self._header_pages - header_info = struct.pack( - str('III'), self._magic_num, alloc_page_pos, used_pages) - assert len(header_info) == self.s_allocator_header, \ - 'invalid size of header_info' - - memcopy(self._base[0:self.s_allocator_header], header_info) - self.set_page_status(0, self._header_pages, '1') - self.set_page_status(self._header_pages, self._free_pages, '0') - - def header(self): - """ get header info of this allocator - """ - header_str = self._base[0:self.s_allocator_header].tostring() - magic, pos, used = struct.unpack(str('III'), header_str) - - assert magic == self._magic_num, \ - 'invalid header magic[%d] in shared memory' % (magic) - return self._header_pages, self._total_pages, pos, used - - def empty(self): - """ are all allocatable pages available - """ - header_pages, pages, pos, used = self.header() - return header_pages == used - - def full(self): - """ are all allocatable pages used - """ - header_pages, pages, pos, used = self.header() - return header_pages + used == pages - - def __str__(self): - header_pages, pages, pos, used = self.header() - desc = '{page_info[magic:%d,total:%d,used:%d,header:%d,alloc_pos:%d,pagesize:%d]}' \ - % (self._magic_num, pages, used, header_pages, pos, self._page_size) - return 'PageAllocator:%s' % (desc) - - def set_alloc_info(self, alloc_pos, used_pages): - """ set allocating position to new value - """ - memcopy(self._base[4:12], struct.pack(str('II'), alloc_pos, used_pages)) - - def set_page_status(self, start, page_num, status): - """ set pages from 'start' to 'end' with new same status 'status' - """ - assert status in ['0', '1'], 'invalid status[%s] for page status '\ - 'in allocator[%s]' % (status, str(self)) - start += self.s_allocator_header - end = start + page_num - assert start >= 0 and end <= self._header_size, 'invalid end[%d] of pages '\ - 'in allocator[%s]' % (end, str(self)) - memcopy(self._base[start:end], str(status * page_num)) - - def get_page_status(self, start, page_num, ret_flag=False): - start += self.s_allocator_header - end = start + page_num - assert start >= 0 and end <= self._header_size, 'invalid end[%d] of pages '\ - 'in allocator[%s]' % (end, str(self)) - status = self._base[start:end].tostring().decode() - if ret_flag: - return status - - zero_num = status.count('0') - if zero_num == 0: - return (page_num, 1) - else: - return (zero_num, 0) - - def malloc_page(self, page_num): - header_pages, pages, pos, used = self.header() - end = pos + page_num - if end > pages: - pos = self._header_pages - end = pos + page_num - - start_pos = pos - flags = '' - while True: - # maybe flags already has some '0' pages, - # so just check 'page_num - len(flags)' pages - flags = self.get_page_status( - pos, page_num, ret_flag=True) - - if flags.count('0') == page_num: - break - - # not found enough pages, so shift to next few pages - free_pos = flags.rfind('1') + 1 - pos += free_pos - end = pos + page_num - if end > pages: - pos = self._header_pages - end = pos + page_num - flags = '' - - # not found available pages after scan all pages - if pos <= start_pos and end >= start_pos: - logger.debug('not found available pages after scan all pages') - break - - page_status = (flags.count('0'), 0) - if page_status != (page_num, 0): - free_pages = self._total_pages - used - if free_pages == 0: - err_msg = 'all pages have been used:%s' % (str(self)) - else: - err_msg = 'not found available pages with page_status[%s] '\ - 'and %d free pages' % (str(page_status), free_pages) - err_msg = 'failed to malloc %d pages at pos[%d] for reason[%s] and allocator status[%s]' \ - % (page_num, pos, err_msg, str(self)) - raise MemoryFullError(err_msg) - - self.set_page_status(pos, page_num, '1') - used += page_num - self.set_alloc_info(end, used) - return pos - - def free_page(self, start, page_num): - """ free 'page_num' pages start from 'start' - """ - page_status = self.get_page_status(start, page_num) - assert page_status == (page_num, 1), \ - 'invalid status[%s] when free [%d, %d]' \ - % (str(page_status), start, page_num) - self.set_page_status(start, page_num, '0') - _, _, pos, used = self.header() - used -= page_num - self.set_alloc_info(pos, used) - - -DEFAULT_SHARED_MEMORY_SIZE = 1024 * 1024 * 1024 - - -class SharedMemoryMgr(object): - """ manage a continouse block of memory, provide - 'malloc' to allocate new buffer, and 'free' to free buffer - """ - s_memory_mgrs = weakref.WeakValueDictionary() - s_mgr_num = 0 - s_log_statis = False - - @classmethod - def get_mgr(cls, id): - """ get a SharedMemoryMgr with size of 'capacity' - """ - assert id in cls.s_memory_mgrs, 'invalid id[%s] for memory managers' % ( - id) - return cls.s_memory_mgrs[id] - - def __init__(self, capacity=None, pagesize=None): - """ init - """ - logger.debug('create SharedMemoryMgr') - - pagesize = 64 * 1024 if pagesize is None else pagesize - assert type(pagesize) is int, "invalid type of pagesize[%s]" \ - % (str(pagesize)) - - capacity = DEFAULT_SHARED_MEMORY_SIZE if capacity is None else capacity - assert type(capacity) is int, "invalid type of capacity[%s]" \ - % (str(capacity)) - - assert capacity > 0, '"size of shared memory should be greater than 0' - self._released = False - self._cap = capacity - self._page_size = pagesize - - assert self._cap % self._page_size == 0, \ - "capacity[%d] and pagesize[%d] are not consistent" \ - % (self._cap, self._page_size) - self._total_pages = self._cap // self._page_size - - self._pid = os.getpid() - SharedMemoryMgr.s_mgr_num += 1 - self._id = self._pid * 100 + SharedMemoryMgr.s_mgr_num - SharedMemoryMgr.s_memory_mgrs[self._id] = self - self._locker = Lock() - self._setup() - - def _setup(self): - self._shared_mem = RawArray('c', self._cap) - self._base = np.frombuffer( - self._shared_mem, dtype='uint8', count=self._cap) - self._locker.acquire() - try: - self._allocator = PageAllocator(self._base, self._total_pages, - self._page_size) - finally: - self._locker.release() - - def malloc(self, size, wait=True): - """ malloc a new SharedBuffer - - Args: - size (int): buffer size to be malloc - wait (bool): whether to wait when no enough memory - - Returns: - SharedBuffer - - Raises: - SharedMemoryError when not found available memory - """ - page_num = int(math.ceil(size / self._page_size)) - size = page_num * self._page_size - - start = None - ct = 0 - errmsg = '' - while True: - self._locker.acquire() - try: - start = self._allocator.malloc_page(page_num) - alloc_status = str(self._allocator) - except MemoryFullError as e: - start = None - errmsg = e.errmsg - if not wait: - raise e - finally: - self._locker.release() - - if start is None: - time.sleep(0.1) - if ct % 100 == 0: - logger.warn('not enough space for reason[%s]' % (errmsg)) - - ct += 1 - else: - break - - return SharedBuffer(self._id, size, start, alloc_status=alloc_status) - - def free(self, shared_buf): - """ free a SharedBuffer - - Args: - shared_buf (SharedBuffer): buffer to be freed - - Returns: - None - - Raises: - SharedMemoryError when failed to release this buffer - """ - assert shared_buf._owner == self._id, "invalid shared_buf[%s] "\ - "for it's not allocated from me[%s]" % (str(shared_buf), str(self)) - cap = shared_buf.capacity() - start_page = shared_buf._pos - page_num = cap // self._page_size - - #maybe we don't need this lock here - self._locker.acquire() - try: - self._allocator.free_page(start_page, page_num) - finally: - self._locker.release() - - def put_data(self, shared_buf, data): - """ fill 'data' into 'shared_buf' - """ - assert len(data) <= shared_buf.capacity(), 'too large data[%d] '\ - 'for this buffer[%s]' % (len(data), str(shared_buf)) - start = shared_buf._pos * self._page_size - end = start + len(data) - assert start >= 0 and end <= self._cap, "invalid start "\ - "position[%d] when put data to buff:%s" % (start, str(shared_buf)) - self._base[start:end] = np.frombuffer(data, 'uint8', len(data)) - - def get_data(self, shared_buf, offset, size, no_copy=True): - """ extract 'data' from 'shared_buf' in range [offset, offset + size) - """ - start = shared_buf._pos * self._page_size - start += offset - if no_copy: - return self._base[start:start + size] - else: - return self._base[start:start + size].tostring() - - def __str__(self): - return 'SharedMemoryMgr:{id:%d, %s}' % (self._id, str(self._allocator)) - - def __del__(self): - if SharedMemoryMgr.s_log_statis: - logger.info('destroy [%s]' % (self)) - - if not self._released and not self._allocator.empty(): - logger.debug('not empty when delete this SharedMemoryMgr[%s]' % - (self)) - else: - self._released = True - - if self._id in SharedMemoryMgr.s_memory_mgrs: - del SharedMemoryMgr.s_memory_mgrs[self._id] - SharedMemoryMgr.s_mgr_num -= 1 diff --git a/PaddleCV/PaddleDetection/ppdet/data/transform/transformer.py b/PaddleCV/PaddleDetection/ppdet/data/transform/transformer.py deleted file mode 100644 index ad8845677db7cc17b5212876762a9b697344ba29..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/data/transform/transformer.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import functools -import collections -from ..dataset import Dataset - - -class ProxiedDataset(Dataset): - """proxy method called to 'self._ds' when if not defined""" - - def __init__(self, ds): - super(ProxiedDataset, self).__init__() - self._ds = ds - methods = filter(lambda k: not k.startswith('_'), - Dataset.__dict__.keys()) - for m in methods: - func = functools.partial(self._proxy_method, getattr(self, m)) - setattr(self, m, func) - - def _proxy_method(self, func, *args, **kwargs): - """ - proxy call to 'func', if not available then call self._ds.xxx - whose name is the same with func.__name__ - """ - method = func.__name__ - try: - return func(*args, **kwargs) - except NotImplementedError: - ds_func = getattr(self._ds, method) - return ds_func(*args, **kwargs) - - -class MappedDataset(ProxiedDataset): - def __init__(self, ds, mapper): - super(MappedDataset, self).__init__(ds) - self._ds = ds - self._mapper = mapper - - def next(self): - sample = self._ds.next() - return self._mapper(sample) - - -class BatchedDataset(ProxiedDataset): - """ - Batching samples - - Args: - ds (instance of Dataset): dataset to be batched - batchsize (int): sample number for each batch - drop_last (bool): drop last samples when not enough for one batch - drop_empty (bool): drop samples which have empty field - """ - - def __init__(self, ds, batchsize, drop_last=False, drop_empty=True): - super(BatchedDataset, self).__init__(ds) - self._batchsz = batchsize - self._drop_last = drop_last - self._drop_empty = drop_empty - - def next(self): - """proxy to self._ds.next""" - - def empty(x): - if isinstance(x, np.ndarray) and x.size == 0: - return True - elif isinstance(x, collections.Sequence) and len(x) == 0: - return True - else: - return False - - def has_empty(items): - if any(x is None for x in items): - return True - if any(empty(x) for x in items): - return True - return False - - batch = [] - for _ in range(self._batchsz): - try: - out = self._ds.next() - while self._drop_empty and has_empty(out): - out = self._ds.next() - batch.append(out) - except StopIteration: - if not self._drop_last and len(batch) > 0: - return batch - else: - raise StopIteration - return batch diff --git a/PaddleCV/PaddleDetection/ppdet/experimental/__init__.py b/PaddleCV/PaddleDetection/ppdet/experimental/__init__.py deleted file mode 100644 index f70396193135830ef2d1de8e357842ea1ef0eea2..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/experimental/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import - -from .mixed_precision import * -from . import mixed_precision - -__all__ = mixed_precision.__all__ diff --git a/PaddleCV/PaddleDetection/ppdet/experimental/mixed_precision.py b/PaddleCV/PaddleDetection/ppdet/experimental/mixed_precision.py deleted file mode 100644 index d8c333bf4b99b51857e47779b2f8998758cd0318..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/experimental/mixed_precision.py +++ /dev/null @@ -1,338 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import print_function - -import six -from paddle.fluid.framework import Parameter -from paddle.fluid import layers -from paddle.fluid import core -from paddle.fluid import unique_name -import paddle.fluid.layer_helper_base as lhb -import paddle.fluid.optimizer as optim - -__all__ = ['mixed_precision_global_state', 'mixed_precision_context', - 'StaticLossScale', 'DynamicLossScale'] - -_mixed_precision_global_state = None - - -def mixed_precision_global_state(): - return _mixed_precision_global_state - - -class LossScale(object): - def __init__(self): - super(LossScale, self).__init__() - - def get_loss_scale_var(self): - return self.scale - - def increment(self): - raise NotImplementedError() - - def decrement(self): - raise NotImplementedError() - - -class StaticLossScale(LossScale): - """ - Static (fixed) loss scale manager. - - Args: - init_loss_scale (float): initial loss scale value. - - Examples: - - .. code-block:: python - - from paddle import fluid - from ppdet.experimental import (mixed_precision_context, - StaticLossScale) - - with mixed_precision_context(StaticLossScale(8.), True) as ctx: - # ... - # scale loss - loss_scale = ctx.get_loss_scale_var() - - """ - - def __init__(self, init_loss_scale=1.): - super(StaticLossScale, self).__init__() - self.scale = layers.create_global_var( - name=unique_name.generate("loss_scale"), - shape=[1], - value=init_loss_scale, - dtype='float32', - persistable=True) - - -class DynamicLossScale(LossScale): - """ - Dynamic loss scale manager. it works as follows: - if gradients is valid for `increment_every` steps, loss scale values is - increased by `factor`, otherwise loss scale values is decreased by `factor` - - Args: - init_loss_scale (float): initial loss scale value. - increment_every (int): minimum 'good' steps before loss scale increase. - factor (float): increase/decrease loss scale by this much. - - Examples: - - .. code-block:: python - - from paddle import fluid - from ppdet.experimental import (mixed_precision_context, - DynamicLossScale) - - loss_scale = DynamicLossScale(8., 1000, 4.) - with mixed_precision_context(loss_scale, True) as ctx: - # ... - # scale loss - loss_scale = ctx.get_loss_scale_var() - - """ - - def __init__(self, init_loss_scale=2**15, increment_every=2000, factor=2.): - super(DynamicLossScale, self).__init__() - self.scale = layers.create_global_var( - name=unique_name.generate("loss_scale"), - shape=[1], - value=init_loss_scale, - dtype='float32', - persistable=True) - self.good_steps = layers.create_global_var( - name=unique_name.generate("good_steps"), - shape=[1], - value=0, - dtype='int32', - persistable=True) - self.increment_every = layers.fill_constant( - shape=[1], dtype='int32', value=increment_every) - self.factor = factor - - def increment(self): - enough_steps = layers.less_than(self.increment_every, - self.good_steps + 1) - with layers.Switch() as switch: - with switch.case(enough_steps): - new_scale = self.scale * self.factor - scale_valid = layers.isfinite(new_scale) - with layers.Switch() as switch2: - with switch2.case(scale_valid): - layers.assign(new_scale, self.scale) - layers.assign(layers.zeros_like(self.good_steps), - self.good_steps) - with switch2.default(): - layers.increment(self.good_steps) - with switch.default(): - layers.increment(self.good_steps) - - def decrement(self): - new_scale = self.scale / self.factor - one = layers.fill_constant(shape=[1], dtype='float32', value=1.0) - less_than_one = layers.less_than(new_scale, one) - with layers.Switch() as switch: - with switch.case(less_than_one): - layers.assign(one, self.scale) - with switch.default(): - layers.assign(new_scale, self.scale) - - layers.assign(layers.zeros_like(self.good_steps), - self.good_steps) - - -class mixed_precision_context(object): - """ - Context manager for mixed precision training. - - Args: - loss_scale (float, str or obj): loss scale settings, can be: - 1. an number: use fixed loss scale. - 2. 'dynamic': use a default `DynamicLossScale`. - 3. `DynamicLossScale` or `StaticLossScale` instance. - enabled (bool): enable mixed precision training. - - Examples: - - .. code-block:: python - - from paddle import fluid - from ppdet.experimental import mixed_precision_context - - with mixed_precision_context('dynamic', True) as ctx: - # cast inputs to float16 - inputs = fluid.layers.cast(inputs, "float16") - # build model here - logits = model(inputs) - # use float32 for softmax - logits = fluid.layers.cast(logits, "float32") - softmax = fluid.layers.softmax(logits) - loss = fluid.layers.cross_entropy(input=softmax, label=label) - avg_loss = fluid.layers.mean(loss) - # scale loss - loss_scale = ctx.get_loss_scale_var() - avg_loss *= loss_scale - optimizer = fluid.optimizer.Momentum(...) - optimizer.minimize(avg_loss) - - """ - - def __init__(self, loss_scale=1., enabled=True): - super(mixed_precision_context, self).__init__() - self.enabled = enabled - if not enabled: - return - monkey_patch() - if isinstance(loss_scale, six.integer_types + (float,)): - self.loss_scale = StaticLossScale(loss_scale) - elif loss_scale == 'dynamic': - self.loss_scale = DynamicLossScale() - else: - assert isinstance(loss_scale, LossScale), \ - "Invalid loss scale argument" - self.loss_scale = loss_scale - - @property - def dynamic_scaling(self): - return isinstance(self.loss_scale, DynamicLossScale) - - def __getattr__(self, attr): - if attr in ['get_loss_scale_var', 'increment', 'decrement']: - return getattr(self.loss_scale, attr) - - def __enter__(self): - if not self.enabled: - return - global _mixed_precision_global_state - _mixed_precision_global_state = self - return mixed_precision_global_state() - - def __exit__(self, *args): - if not self.enabled: - return - global _mixed_precision_global_state - _mixed_precision_global_state = None - return mixed_precision_global_state() - - -def create_parameter(self, - attr, - shape, - dtype, - is_bias=False, - default_initializer=None): - mp_state = mixed_precision_global_state() - is_half = (isinstance(dtype, str) and dtype == 'float16') \ - or (isinstance(dtype, core.VarDesc.VarType) - and dtype == core.VarDesc.VarType.FP16) - - if is_half and mp_state is not None: - dtype = 'float32' - - param = self._create_parameter(attr, shape, dtype, - is_bias, default_initializer) - if not is_half or mp_state is None: - return param - - param16 = self.main_program.current_block().create_var( - name=param.name + '.fp16', - dtype='float16', - type=param.type, - persistable=False) - self.append_op( - type='cast', - inputs={'X': [param]}, - outputs={'Out': [param16]}, - attrs={'in_dtype': param.dtype, - 'out_dtype': param16.dtype}) - return param16 - - -def scale_gradient(block, context): - state = mixed_precision_global_state() - if state is None: - return - scale = state.get_loss_scale_var() - op_desc = block.desc.op(block.desc.op_size() - 1) - op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName() - bwd_role = core.op_proto_and_checker_maker.OpRole.Backward - for name in [n for n in op_desc.output_arg_names() if n in context]: - fwd_var = block._var_recursive(context[name]) - if not isinstance(fwd_var, Parameter): - continue # TODO verify all use cases - clip_op_desc = block.desc.append_op() - clip_op_desc.set_type("elementwise_div") - clip_op_desc.set_input("X", [name]) - clip_op_desc.set_input("Y", [scale.name]) - clip_op_desc.set_output("Out", [name]) - clip_op_desc._set_attr(op_role_attr_name, bwd_role) - - -def update_loss_scale(grads): - state = mixed_precision_global_state() - if state is None or not state.dynamic_scaling: - return - per_grad_check = layers.stack([layers.reduce_sum(g) for g in grads]) - grad_valid = layers.isfinite(per_grad_check) - - with layers.Switch() as switch: - with switch.case(grad_valid): - state.increment() - with switch.default(): - state.decrement() - return grad_valid - - -def backward(self, loss, **kwargs): - state = mixed_precision_global_state() - callbacks = 'callbacks' in kwargs and kwargs['callbacks'] or None - if callbacks is None: - from paddle.fluid.clip import error_clip_callback - callbacks = [error_clip_callback] # XXX what if gradient is zero? - if state is not None: - kwargs['callbacks'] = [scale_gradient] + callbacks - else: - kwargs['callbacks'] = callbacks - param_grads = self._backward(loss, **kwargs) - if state is not None: - grad_valid = update_loss_scale(v for k, v in param_grads) - if state.dynamic_scaling: - with layers.Switch() as switch: - with switch.case(grad_valid): - pass - with switch.default(): - for _, g in param_grads: - layers.assign(layers.zeros_like(g), g) - - return param_grads - - -mixed_precision_patched = False - - -# XXX this is a temporary measure, until thoroughly evaluated -def monkey_patch(): - global mixed_precision_patched - if mixed_precision_patched: - return - create_parameter_orig = lhb.LayerHelperBase.create_parameter - lhb.LayerHelperBase.create_parameter = create_parameter - lhb.LayerHelperBase._create_parameter = create_parameter_orig - backward_orig = optim.Optimizer.backward - optim.Optimizer.backward = backward - optim.Optimizer._backward = backward_orig - mixed_precision_patched = True diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/__init__.py b/PaddleCV/PaddleDetection/ppdet/modeling/__init__.py deleted file mode 100644 index f9491d771626848a631804ac3bf663f2ee6ec82c..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import - -# XXX for triggering decorators -from . import anchor_heads -from . import architectures -from . import backbones -from . import roi_extractors -from . import roi_heads -from . import ops -from . import target_assigners - -from .anchor_heads import * -from .architectures import * -from .backbones import * -from .roi_extractors import * -from .roi_heads import * -from .ops import * -from .target_assigners import * diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/anchor_heads/__init__.py b/PaddleCV/PaddleDetection/ppdet/modeling/anchor_heads/__init__.py deleted file mode 100644 index 1ed22160d23cc5c7437507350037e71134ee8824..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/anchor_heads/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import - -from . import rpn_head -from . import yolo_head -from . import retina_head - -from .rpn_head import * -from .yolo_head import * -from .retina_head import * diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/anchor_heads/retina_head.py b/PaddleCV/PaddleDetection/ppdet/modeling/anchor_heads/retina_head.py deleted file mode 100644 index 41246e8b6267881f62386653841ecfb525a254e1..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/anchor_heads/retina_head.py +++ /dev/null @@ -1,407 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal, Constant -from paddle.fluid.regularizer import L2Decay -from ppdet.modeling.ops import (AnchorGenerator, RetinaTargetAssign, - RetinaOutputDecoder) - -from ppdet.core.workspace import register - -__all__ = ['RetinaHead'] - - -@register -class RetinaHead(object): - """ - Retina Head - - Args: - anchor_generator (object): `AnchorGenerator` instance - target_assign (object): `RetinaTargetAssign` instance - output_decoder (object): `RetinaOutputDecoder` instance - num_convs_per_octave (int): Number of convolution layers in each octave - num_chan (int): Number of octave output channels - max_level (int): Highest level of FPN output - min_level (int): Lowest level of FPN output - prior_prob (float): Used to set the bias init for the class prediction layer - base_scale (int): Anchors are generated based on this scale - num_scales_per_octave (int): Number of anchor scales per octave - num_classes (int): Number of classes - gamma (float): The parameter in focal loss - alpha (float): The parameter in focal loss - sigma (float): The parameter in smooth l1 loss - """ - __inject__ = ['anchor_generator', 'target_assign', 'output_decoder'] - __shared__ = ['num_classes'] - - def __init__(self, - anchor_generator=AnchorGenerator().__dict__, - target_assign=RetinaTargetAssign().__dict__, - output_decoder=RetinaOutputDecoder().__dict__, - num_convs_per_octave=4, - num_chan=256, - max_level=7, - min_level=3, - prior_prob=0.01, - base_scale=4, - num_scales_per_octave=3, - num_classes=81, - gamma=2.0, - alpha=0.25, - sigma=3.0151134457776365): - self.anchor_generator = anchor_generator - self.target_assign = target_assign - self.output_decoder = output_decoder - self.num_convs_per_octave = num_convs_per_octave - self.num_chan = num_chan - self.max_level = max_level - self.min_level = min_level - self.prior_prob = prior_prob - self.base_scale = base_scale - self.num_scales_per_octave = num_scales_per_octave - self.num_classes = num_classes - self.gamma = gamma - self.alpha = alpha - self.sigma = sigma - if isinstance(anchor_generator, dict): - self.anchor_generator = AnchorGenerator(**anchor_generator) - if isinstance(target_assign, dict): - self.target_assign = RetinaTargetAssign(**target_assign) - if isinstance(output_decoder, dict): - self.output_decoder = RetinaOutputDecoder(**output_decoder) - - def _class_subnet(self, body_feats, spatial_scale): - """ - Get class predictions of all level FPN level. - - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - - Returns: - cls_pred_input(list): Class prediction of all input fpn levels. - """ - assert len(body_feats) == self.max_level - self.min_level + 1 - fpn_name_list = list(body_feats.keys()) - cls_pred_list = [] - for lvl in range(self.min_level, self.max_level + 1): - fpn_name = fpn_name_list[self.max_level - lvl] - subnet_blob = body_feats[fpn_name] - for i in range(self.num_convs_per_octave): - conv_name = 'retnet_cls_conv_n{}_fpn{}'.format(i, lvl) - conv_share_name = 'retnet_cls_conv_n{}_fpn{}'.format( - i, self.min_level) - subnet_blob_in = subnet_blob - subnet_blob = fluid.layers.conv2d( - input=subnet_blob_in, - num_filters=self.num_chan, - filter_size=3, - stride=1, - padding=1, - act='relu', - name=conv_name, - param_attr=ParamAttr( - name=conv_share_name + '_w', - initializer=Normal( - loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=conv_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - - # class prediction - cls_name = 'retnet_cls_pred_fpn{}'.format(lvl) - cls_share_name = 'retnet_cls_pred_fpn{}'.format(self.min_level) - num_anchors = self.num_scales_per_octave * len( - self.anchor_generator.aspect_ratios) - cls_dim = num_anchors * (self.num_classes - 1) - # bias initialization: b = -log((1 - pai) / pai) - bias_init = float(-np.log((1 - self.prior_prob) / self.prior_prob)) - out_cls = fluid.layers.conv2d( - input=subnet_blob, - num_filters=cls_dim, - filter_size=3, - stride=1, - padding=1, - act=None, - name=cls_name, - param_attr=ParamAttr( - name=cls_share_name + '_w', - initializer=Normal( - loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=cls_share_name + '_b', - initializer=Constant(value=bias_init), - learning_rate=2., - regularizer=L2Decay(0.))) - cls_pred_list.append(out_cls) - - return cls_pred_list - - def _bbox_subnet(self, body_feats, spatial_scale): - """ - Get bounding box predictions of all level FPN level. - - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - - Returns: - bbox_pred_input(list): Bounding box prediction of all input fpn - levels. - """ - assert len(body_feats) == self.max_level - self.min_level + 1 - fpn_name_list = list(body_feats.keys()) - bbox_pred_list = [] - for lvl in range(self.min_level, self.max_level + 1): - fpn_name = fpn_name_list[self.max_level - lvl] - subnet_blob = body_feats[fpn_name] - for i in range(self.num_convs_per_octave): - conv_name = 'retnet_bbox_conv_n{}_fpn{}'.format(i, lvl) - conv_share_name = 'retnet_bbox_conv_n{}_fpn{}'.format( - i, self.min_level) - subnet_blob_in = subnet_blob - subnet_blob = fluid.layers.conv2d( - input=subnet_blob_in, - num_filters=self.num_chan, - filter_size=3, - stride=1, - padding=1, - act='relu', - name=conv_name, - param_attr=ParamAttr( - name=conv_share_name + '_w', - initializer=Normal( - loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=conv_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - - # bbox prediction - bbox_name = 'retnet_bbox_pred_fpn{}'.format(lvl) - bbox_share_name = 'retnet_bbox_pred_fpn{}'.format(self.min_level) - num_anchors = self.num_scales_per_octave * len( - self.anchor_generator.aspect_ratios) - bbox_dim = num_anchors * 4 - out_bbox = fluid.layers.conv2d( - input=subnet_blob, - num_filters=bbox_dim, - filter_size=3, - stride=1, - padding=1, - act=None, - name=bbox_name, - param_attr=ParamAttr( - name=bbox_share_name + '_w', - initializer=Normal( - loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=bbox_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - bbox_pred_list.append(out_bbox) - return bbox_pred_list - - def _anchor_generate(self, body_feats, spatial_scale): - """ - Get anchor boxes of all level FPN level. - - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - - Return: - anchor_input(list): Anchors of all input fpn levels with shape of. - anchor_var_input(list): Anchor variance of all input fpn levels with - shape. - """ - assert len(body_feats) == self.max_level - self.min_level + 1 - fpn_name_list = list(body_feats.keys()) - anchor_list = [] - anchor_var_list = [] - for lvl in range(self.min_level, self.max_level + 1): - anchor_sizes = [] - stride = int(1 / spatial_scale[self.max_level - lvl]) - for octave in range(self.num_scales_per_octave): - anchor_size = stride * ( - 2**(float(octave) / - float(self.num_scales_per_octave))) * self.base_scale - anchor_sizes.append(anchor_size) - fpn_name = fpn_name_list[self.max_level - lvl] - anchor, anchor_var = self.anchor_generator( - input=body_feats[fpn_name], - anchor_sizes=anchor_sizes, - aspect_ratios=self.anchor_generator.aspect_ratios, - stride=[stride, stride]) - anchor_list.append(anchor) - anchor_var_list.append(anchor_var) - return anchor_list, anchor_var_list - - def _get_output(self, body_feats, spatial_scale): - """ - Get class, bounding box predictions and anchor boxes of all level FPN level. - - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - - Returns: - cls_pred_input(list): Class prediction of all input fpn levels. - bbox_pred_input(list): Bounding box prediction of all input fpn - levels. - anchor_input(list): Anchors of all input fpn levels with shape of. - anchor_var_input(list): Anchor variance of all input fpn levels with - shape. - """ - assert len(body_feats) == self.max_level - self.min_level + 1 - # class subnet - cls_pred_list = self._class_subnet(body_feats, spatial_scale) - # bbox subnet - bbox_pred_list = self._bbox_subnet(body_feats, spatial_scale) - #generate anchors - anchor_list, anchor_var_list = self._anchor_generate(body_feats, - spatial_scale) - cls_pred_reshape_list = [] - bbox_pred_reshape_list = [] - anchor_reshape_list = [] - anchor_var_reshape_list = [] - for i in range(self.max_level - self.min_level + 1): - cls_pred_transpose = fluid.layers.transpose( - cls_pred_list[i], perm=[0, 2, 3, 1]) - cls_pred_reshape = fluid.layers.reshape( - cls_pred_transpose, shape=(0, -1, self.num_classes - 1)) - bbox_pred_transpose = fluid.layers.transpose( - bbox_pred_list[i], perm=[0, 2, 3, 1]) - bbox_pred_reshape = fluid.layers.reshape( - bbox_pred_transpose, shape=(0, -1, 4)) - anchor_reshape = fluid.layers.reshape(anchor_list[i], shape=(-1, 4)) - anchor_var_reshape = fluid.layers.reshape( - anchor_var_list[i], shape=(-1, 4)) - cls_pred_reshape_list.append(cls_pred_reshape) - bbox_pred_reshape_list.append(bbox_pred_reshape) - anchor_reshape_list.append(anchor_reshape) - anchor_var_reshape_list.append(anchor_var_reshape) - output = {} - output['cls_pred'] = cls_pred_reshape_list - output['bbox_pred'] = bbox_pred_reshape_list - output['anchor'] = anchor_reshape_list - output['anchor_var'] = anchor_var_reshape_list - return output - - def get_prediction(self, body_feats, spatial_scale, im_info): - """ - Get prediction bounding box in test stage. - - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the - number of input images, each element consists of im_height, - im_width, im_scale. - - Returns: - pred_result(Variable): Prediction result with shape [N, 6]. Each - row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]. - N is the total number of prediction. - """ - output = self._get_output(body_feats, spatial_scale) - cls_pred_reshape_list = output['cls_pred'] - bbox_pred_reshape_list = output['bbox_pred'] - anchor_reshape_list = output['anchor'] - for i in range(self.max_level - self.min_level + 1): - cls_pred_reshape_list[i] = fluid.layers.sigmoid( - cls_pred_reshape_list[i]) - pred_result = self.output_decoder( - bboxes=bbox_pred_reshape_list, - scores=cls_pred_reshape_list, - anchors=anchor_reshape_list, - im_info=im_info) - return {'bbox': pred_result} - - def get_loss(self, body_feats, spatial_scale, im_info, gt_box, gt_label, - is_crowd): - """ - Calculate the loss of retinanet. - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - im_info(Variable): A 2-D LoDTensor with shape [B, 3]. B is the - number of input images, each element consists of im_height, - im_width, im_scale. - gt_box(Variable): The ground-truth bounding boxes with shape [M, 4]. - M is the number of groundtruth. - gt_label(Variable): The ground-truth labels with shape [M, 1]. - M is the number of groundtruth. - is_crowd(Variable): Indicates groud-truth is crowd or not with - shape [M, 1]. M is the number of groundtruth. - - Returns: - Type: dict - loss_cls(Variable): focal loss. - loss_bbox(Variable): smooth l1 loss. - """ - output = self._get_output(body_feats, spatial_scale) - cls_pred_reshape_list = output['cls_pred'] - bbox_pred_reshape_list = output['bbox_pred'] - anchor_reshape_list = output['anchor'] - anchor_var_reshape_list = output['anchor_var'] - - cls_pred_input = fluid.layers.concat(cls_pred_reshape_list, axis=1) - bbox_pred_input = fluid.layers.concat(bbox_pred_reshape_list, axis=1) - anchor_input = fluid.layers.concat(anchor_reshape_list, axis=0) - anchor_var_input = fluid.layers.concat(anchor_var_reshape_list, axis=0) - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight, fg_num = \ - self.target_assign( - bbox_pred=bbox_pred_input, - cls_logits=cls_pred_input, - anchor_box=anchor_input, - anchor_var=anchor_var_input, - gt_boxes=gt_box, - gt_labels=gt_label, - is_crowd=is_crowd, - im_info=im_info, - num_classes=self.num_classes - 1) - fg_num = fluid.layers.reduce_sum(fg_num, name='fg_num') - loss_cls = fluid.layers.sigmoid_focal_loss( - x=score_pred, - label=score_tgt, - fg_num=fg_num, - gamma=self.gamma, - alpha=self.alpha) - loss_cls = fluid.layers.reduce_sum(loss_cls, name='loss_cls') - loss_bbox = fluid.layers.smooth_l1( - x=loc_pred, - y=loc_tgt, - sigma=self.sigma, - inside_weight=bbox_weight, - outside_weight=bbox_weight) - loss_bbox = fluid.layers.reduce_sum(loss_bbox, name='loss_bbox') - loss_bbox = loss_bbox / fg_num - return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox} diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/anchor_heads/rpn_head.py b/PaddleCV/PaddleDetection/ppdet/modeling/anchor_heads/rpn_head.py deleted file mode 100644 index 876aafe36553b31cb1b41fec402949eb5a4c9f4b..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/anchor_heads/rpn_head.py +++ /dev/null @@ -1,497 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal -from paddle.fluid.regularizer import L2Decay - -from ppdet.core.workspace import register -from ppdet.modeling.ops import (AnchorGenerator, RPNTargetAssign, - GenerateProposals) - -__all__ = ['RPNTargetAssign', 'GenerateProposals', 'RPNHead', 'FPNRPNHead'] - - -@register -class RPNHead(object): - """ - RPN Head - - Args: - anchor_generator (object): `AnchorGenerator` instance - rpn_target_assign (object): `RPNTargetAssign` instance - train_proposal (object): `GenerateProposals` instance for training - test_proposal (object): `GenerateProposals` instance for testing - num_classes (int): number of classes in rpn output - """ - __inject__ = [ - 'anchor_generator', 'rpn_target_assign', 'train_proposal', - 'test_proposal' - ] - - def __init__(self, - anchor_generator=AnchorGenerator().__dict__, - rpn_target_assign=RPNTargetAssign().__dict__, - train_proposal=GenerateProposals(12000, 2000).__dict__, - test_proposal=GenerateProposals().__dict__, - num_classes=1): - super(RPNHead, self).__init__() - self.anchor_generator = anchor_generator - self.rpn_target_assign = rpn_target_assign - self.train_proposal = train_proposal - self.test_proposal = test_proposal - self.num_classes = num_classes - if isinstance(anchor_generator, dict): - self.anchor_generator = AnchorGenerator(**anchor_generator) - if isinstance(rpn_target_assign, dict): - self.rpn_target_assign = RPNTargetAssign(**rpn_target_assign) - if isinstance(train_proposal, dict): - self.train_proposal = GenerateProposals(**train_proposal) - if isinstance(test_proposal, dict): - self.test_proposal = GenerateProposals(**test_proposal) - - def _get_output(self, input): - """ - Get anchor and RPN head output. - - Args: - input(Variable): feature map from backbone with shape of [N, C, H, W] - - Returns: - rpn_cls_score(Variable): Output of rpn head with shape of - [N, num_anchors, H, W]. - rpn_bbox_pred(Variable): Output of rpn head with shape of - [N, num_anchors * 4, H, W]. - """ - dim_out = input.shape[1] - rpn_conv = fluid.layers.conv2d( - input=input, - num_filters=dim_out, - filter_size=3, - stride=1, - padding=1, - act='relu', - name='conv_rpn', - param_attr=ParamAttr( - name="conv_rpn_w", initializer=Normal( - loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) - # Generate anchors - self.anchor, self.anchor_var = self.anchor_generator(input=rpn_conv) - num_anchor = self.anchor.shape[2] - # Proposal classification scores - self.rpn_cls_score = fluid.layers.conv2d( - rpn_conv, - num_filters=num_anchor * self.num_classes, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_cls_score', - param_attr=ParamAttr( - name="rpn_cls_logits_w", initializer=Normal( - loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="rpn_cls_logits_b", - learning_rate=2., - regularizer=L2Decay(0.))) - # Proposal bbox regression deltas - self.rpn_bbox_pred = fluid.layers.conv2d( - rpn_conv, - num_filters=4 * num_anchor, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_bbox_pred', - param_attr=ParamAttr( - name="rpn_bbox_pred_w", initializer=Normal( - loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="rpn_bbox_pred_b", - learning_rate=2., - regularizer=L2Decay(0.))) - return self.rpn_cls_score, self.rpn_bbox_pred - - def get_proposals(self, body_feats, im_info, mode='train'): - """ - Get proposals according to the output of backbone. - - Args: - body_feats (dict): The dictionary of feature maps from backbone. - im_info(Variable): The information of image with shape [N, 3] with - shape (height, width, scale). - body_feat_names(list): A list of names of feature maps from - backbone. - - Returns: - rpn_rois(Variable): Output proposals with shape of (rois_num, 4). - """ - - # In RPN Heads, only the last feature map of backbone is used. - # And body_feat_names[-1] represents the last level name of backbone. - body_feat = list(body_feats.values())[-1] - rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat) - - if self.num_classes == 1: - rpn_cls_prob = fluid.layers.sigmoid( - rpn_cls_score, name='rpn_cls_prob') - else: - rpn_cls_score = fluid.layers.transpose( - rpn_cls_score, perm=[0, 2, 3, 1]) - rpn_cls_score = fluid.layers.reshape( - rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes)) - rpn_cls_prob_tmp = fluid.layers.softmax( - rpn_cls_score, use_cudnn=False, name='rpn_cls_prob') - rpn_cls_prob_slice = fluid.layers.slice( - rpn_cls_prob_tmp, axes=[4], starts=[1], - ends=[self.num_classes]) - rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1) - rpn_cls_prob = fluid.layers.reshape( - rpn_cls_prob, shape=(0, 0, 0, -1)) - rpn_cls_prob = fluid.layers.transpose( - rpn_cls_prob, perm=[0, 3, 1, 2]) - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - rpn_rois, rpn_roi_probs = prop_op( - scores=rpn_cls_prob, - bbox_deltas=rpn_bbox_pred, - im_info=im_info, - anchors=self.anchor, - variances=self.anchor_var) - return rpn_rois - - def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor, - anchor_var): - rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1]) - rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1]) - anchor = fluid.layers.reshape(anchor, shape=(-1, 4)) - anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4)) - rpn_cls_score = fluid.layers.reshape( - x=rpn_cls_score, shape=(0, -1, self.num_classes)) - rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4)) - return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var - - def _get_loss_input(self): - for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']: - if not getattr(self, attr, None): - raise ValueError("self.{} should not be None,".format(attr), - "call RPNHead.get_proposals first") - return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred, - self.anchor, self.anchor_var) - - def get_loss(self, im_info, gt_box, is_crowd, gt_label=None): - """ - Sample proposals and Calculate rpn loss. - - Args: - im_info(Variable): The information of image with shape [N, 3] with - shape (height, width, scale). - gt_box(Variable): The ground-truth bounding boxes with shape [M, 4]. - M is the number of groundtruth. - is_crowd(Variable): Indicates groud-truth is crowd or not with - shape [M, 1]. M is the number of groundtruth. - - Returns: - Type: dict - rpn_cls_loss(Variable): RPN classification loss. - rpn_bbox_loss(Variable): RPN bounding box regression loss. - - """ - rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input() - if self.num_classes == 1: - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - self.rpn_target_assign( - bbox_pred=rpn_bbox, - cls_logits=rpn_cls, - anchor_box=anchor, - anchor_var=anchor_var, - gt_boxes=gt_box, - is_crowd=is_crowd, - im_info=im_info) - score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32') - score_tgt.stop_gradient = True - rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=score_pred, label=score_tgt) - else: - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - self.rpn_target_assign( - bbox_pred=rpn_bbox, - cls_logits=rpn_cls, - anchor_box=anchor, - anchor_var=anchor_var, - gt_boxes=gt_box, - gt_labels=gt_label, - is_crowd=is_crowd, - num_classes=self.num_classes, - im_info=im_info) - labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64') - labels_int64.stop_gradient = True - rpn_cls_loss = fluid.layers.softmax_with_cross_entropy( - logits=score_pred, label=labels_int64, numeric_stable_mode=True) - - rpn_cls_loss = fluid.layers.reduce_mean( - rpn_cls_loss, name='loss_rpn_cls') - - loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32') - loc_tgt.stop_gradient = True - rpn_reg_loss = fluid.layers.smooth_l1( - x=loc_pred, - y=loc_tgt, - sigma=3.0, - inside_weight=bbox_weight, - outside_weight=bbox_weight) - rpn_reg_loss = fluid.layers.reduce_sum( - rpn_reg_loss, name='loss_rpn_bbox') - score_shape = fluid.layers.shape(score_tgt) - score_shape = fluid.layers.cast(x=score_shape, dtype='float32') - norm = fluid.layers.reduce_prod(score_shape) - norm.stop_gradient = True - rpn_reg_loss = rpn_reg_loss / norm - - return {'loss_rpn_cls': rpn_cls_loss, 'loss_rpn_bbox': rpn_reg_loss} - - -@register -class FPNRPNHead(RPNHead): - """ - RPN Head that supports FPN input - - Args: - anchor_generator (object): `AnchorGenerator` instance - rpn_target_assign (object): `RPNTargetAssign` instance - train_proposal (object): `GenerateProposals` instance for training - test_proposal (object): `GenerateProposals` instance for testing - anchor_start_size (int): size of anchor at the first scale - num_chan (int): number of FPN output channels - min_level (int): lowest level of FPN output - max_level (int): highest level of FPN output - num_classes (int): number of classes in rpn output - """ - - __inject__ = [ - 'anchor_generator', 'rpn_target_assign', 'train_proposal', - 'test_proposal' - ] - - def __init__(self, - anchor_generator=AnchorGenerator().__dict__, - rpn_target_assign=RPNTargetAssign().__dict__, - train_proposal=GenerateProposals(12000, 2000).__dict__, - test_proposal=GenerateProposals().__dict__, - anchor_start_size=32, - num_chan=256, - min_level=2, - max_level=6, - num_classes=1): - super(FPNRPNHead, self).__init__(anchor_generator, rpn_target_assign, - train_proposal, test_proposal) - self.anchor_start_size = anchor_start_size - self.num_chan = num_chan - self.min_level = min_level - self.max_level = max_level - self.num_classes = num_classes - - self.fpn_rpn_list = [] - self.anchors_list = [] - self.anchor_var_list = [] - - def _get_output(self, input, feat_lvl): - """ - Get anchor and FPN RPN head output at one level. - - Args: - input(Variable): Body feature from backbone. - feat_lvl(int): Indicate the level of rpn output corresponding - to the level of feature map. - - Return: - rpn_cls_score(Variable): Output of one level of fpn rpn head with - shape of [N, num_anchors, H, W]. - rpn_bbox_pred(Variable): Output of one level of fpn rpn head with - shape of [N, num_anchors * 4, H, W]. - """ - slvl = str(feat_lvl) - conv_name = 'conv_rpn_fpn' + slvl - cls_name = 'rpn_cls_logits_fpn' + slvl - bbox_name = 'rpn_bbox_pred_fpn' + slvl - conv_share_name = 'conv_rpn_fpn' + str(self.min_level) - cls_share_name = 'rpn_cls_logits_fpn' + str(self.min_level) - bbox_share_name = 'rpn_bbox_pred_fpn' + str(self.min_level) - - num_anchors = len(self.anchor_generator.aspect_ratios) - conv_rpn_fpn = fluid.layers.conv2d( - input=input, - num_filters=self.num_chan, - filter_size=3, - padding=1, - act='relu', - name=conv_name, - param_attr=ParamAttr( - name=conv_share_name + '_w', - initializer=Normal( - loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=conv_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - - self.anchors, self.anchor_var = self.anchor_generator( - input=conv_rpn_fpn, - anchor_sizes=(self.anchor_start_size * 2. - **(feat_lvl - self.min_level), ), - stride=(2.**feat_lvl, 2.**feat_lvl)) - - cls_num_filters = num_anchors * self.num_classes - self.rpn_cls_score = fluid.layers.conv2d( - input=conv_rpn_fpn, - num_filters=cls_num_filters, - filter_size=1, - act=None, - name=cls_name, - param_attr=ParamAttr( - name=cls_share_name + '_w', - initializer=Normal( - loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=cls_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - self.rpn_bbox_pred = fluid.layers.conv2d( - input=conv_rpn_fpn, - num_filters=num_anchors * 4, - filter_size=1, - act=None, - name=bbox_name, - param_attr=ParamAttr( - name=bbox_share_name + '_w', - initializer=Normal( - loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=bbox_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - return self.rpn_cls_score, self.rpn_bbox_pred - - def _get_single_proposals(self, body_feat, im_info, feat_lvl, mode='train'): - """ - Get proposals in one level according to the output of fpn rpn head - - Args: - body_feat(Variable): the feature map from backone. - im_info(Variable): The information of image with shape [N, 3] with - format (height, width, scale). - feat_lvl(int): Indicate the level of proposals corresponding to - the feature maps. - - Returns: - rpn_rois_fpn(Variable): Output proposals with shape of (rois_num, 4). - rpn_roi_probs_fpn(Variable): Scores of proposals with - shape of (rois_num, 1). - """ - - rpn_cls_score_fpn, rpn_bbox_pred_fpn = self._get_output(body_feat, - feat_lvl) - - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - if self.num_classes == 1: - rpn_cls_prob_fpn = fluid.layers.sigmoid( - rpn_cls_score_fpn, name='rpn_cls_prob_fpn' + str(feat_lvl)) - else: - rpn_cls_score_fpn = fluid.layers.transpose( - rpn_cls_score_fpn, perm=[0, 2, 3, 1]) - rpn_cls_score_fpn = fluid.layers.reshape( - rpn_cls_score_fpn, shape=(0, 0, 0, -1, self.num_classes)) - rpn_cls_prob_fpn = fluid.layers.softmax( - rpn_cls_score_fpn, - use_cudnn=False, - name='rpn_cls_prob_fpn' + str(feat_lvl)) - rpn_cls_prob_fpn = fluid.layers.slice( - rpn_cls_prob_fpn, axes=[4], starts=[1], - ends=[self.num_classes]) - rpn_cls_prob_fpn, _ = fluid.layers.topk(rpn_cls_prob_fpn, 1) - rpn_cls_prob_fpn = fluid.layers.reshape( - rpn_cls_prob_fpn, shape=(0, 0, 0, -1)) - rpn_cls_prob_fpn = fluid.layers.transpose( - rpn_cls_prob_fpn, perm=[0, 3, 1, 2]) - rpn_rois_fpn, rpn_roi_prob_fpn = prop_op( - scores=rpn_cls_prob_fpn, - bbox_deltas=rpn_bbox_pred_fpn, - im_info=im_info, - anchors=self.anchors, - variances=self.anchor_var) - return rpn_rois_fpn, rpn_roi_prob_fpn - - def get_proposals(self, fpn_feats, im_info, mode='train'): - """ - Get proposals in multiple levels according to the output of fpn - rpn head - - Args: - fpn_feats(dict): A dictionary represents the output feature map - of FPN with their name. - im_info(Variable): The information of image with shape [N, 3] with - format (height, width, scale). - - Return: - rois_list(Variable): Output proposals in shape of [rois_num, 4] - """ - rois_list = [] - roi_probs_list = [] - fpn_feat_names = list(fpn_feats.keys()) - for lvl in range(self.min_level, self.max_level + 1): - fpn_feat_name = fpn_feat_names[self.max_level - lvl] - fpn_feat = fpn_feats[fpn_feat_name] - rois_fpn, roi_probs_fpn = self._get_single_proposals( - fpn_feat, im_info, lvl, mode) - self.fpn_rpn_list.append((self.rpn_cls_score, self.rpn_bbox_pred)) - rois_list.append(rois_fpn) - roi_probs_list.append(roi_probs_fpn) - self.anchors_list.append(self.anchors) - self.anchor_var_list.append(self.anchor_var) - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - post_nms_top_n = prop_op.post_nms_top_n - rois_collect = fluid.layers.collect_fpn_proposals( - rois_list, - roi_probs_list, - self.min_level, - self.max_level, - post_nms_top_n, - name='collect') - return rois_collect - - def _get_loss_input(self): - rpn_clses = [] - rpn_bboxes = [] - anchors = [] - anchor_vars = [] - for i in range(len(self.fpn_rpn_list)): - single_input = self._transform_input( - self.fpn_rpn_list[i][0], self.fpn_rpn_list[i][1], - self.anchors_list[i], self.anchor_var_list[i]) - rpn_clses.append(single_input[0]) - rpn_bboxes.append(single_input[1]) - anchors.append(single_input[2]) - anchor_vars.append(single_input[3]) - - rpn_cls = fluid.layers.concat(rpn_clses, axis=1) - rpn_bbox = fluid.layers.concat(rpn_bboxes, axis=1) - anchors = fluid.layers.concat(anchors) - anchor_var = fluid.layers.concat(anchor_vars) - return rpn_cls, rpn_bbox, anchors, anchor_var diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/anchor_heads/yolo_head.py b/PaddleCV/PaddleDetection/ppdet/modeling/anchor_heads/yolo_head.py deleted file mode 100644 index 7e756f267762827b3666e8143dce9a695fc526e2..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/anchor_heads/yolo_head.py +++ /dev/null @@ -1,309 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -from ppdet.modeling.ops import MultiClassNMS -from ppdet.core.workspace import register - -__all__ = ['YOLOv3Head'] - - -@register -class YOLOv3Head(object): - """ - Head block for YOLOv3 network - - Args: - norm_decay (float): weight decay for normalization layer weights - num_classes (int): number of output classes - ignore_thresh (float): threshold to ignore confidence loss - label_smooth (bool): whether to use label smoothing - anchors (list): anchors - anchor_masks (list): anchor masks - nms (object): an instance of `MultiClassNMS` - """ - __inject__ = ['nms'] - __shared__ = ['num_classes', 'weight_prefix_name'] - - def __init__(self, - norm_decay=0., - num_classes=80, - ignore_thresh=0.7, - label_smooth=True, - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - nms=MultiClassNMS( - score_threshold=0.01, - nms_top_k=1000, - keep_top_k=100, - nms_threshold=0.45, - background_label=-1).__dict__, - weight_prefix_name=''): - self.norm_decay = norm_decay - self.num_classes = num_classes - self.ignore_thresh = ignore_thresh - self.label_smooth = label_smooth - self.anchor_masks = anchor_masks - self._parse_anchors(anchors) - self.nms = nms - self.prefix_name = weight_prefix_name - if isinstance(nms, dict): - self.nms = MultiClassNMS(**nms) - - def _conv_bn(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - is_test=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') - out = fluid.layers.batch_norm( - input=conv, - act=None, - is_test=is_test, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - def _detection_block(self, input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2 in detection block {}" \ - .format(channel, name) - - conv = input - for j in range(2): - conv = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.{}.0'.format(name, j)) - conv = self._conv_bn( - conv, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.{}.1'.format(name, j)) - route = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.2'.format(name)) - tip = self._conv_bn( - route, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.tip'.format(name)) - return route, tip - - def _upsample(self, input, scale=2, name=None): - out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) - return out - - def _parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _get_outputs(self, input, is_train=True): - """ - Get YOLOv3 head output - - Args: - input (list): List of Variables, output of backbone stages - is_train (bool): whether in train or test mode - - Returns: - outputs (list): Variables of each output layer - """ - - outputs = [] - - # get last out_layer_num blocks in reverse order - out_layer_num = len(self.anchor_masks) - blocks = input[-1:-out_layer_num - 1:-1] - - route = None - for i, block in enumerate(blocks): - if i > 0: # perform concat in first 2 detection_block - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = self._detection_block( - block, - channel=512 // (2**i), - is_test=(not is_train), - name=self.prefix_name + "yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name=self.prefix_name + - "yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), - name=self.prefix_name + - "yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if i < len(blocks) - 1: - # do not perform upsample in the last detection_block - route = self._conv_bn( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not is_train), - name=self.prefix_name + "yolo_transition.{}".format(i)) - # upsample - route = self._upsample(route) - - return outputs - - def get_loss(self, input, gt_box, gt_label, gt_score): - """ - Get final loss of network of YOLOv3. - - Args: - input (list): List of Variables, output of backbone stages - gt_box (Variable): The ground-truth boudding boxes. - gt_label (Variable): The ground-truth class labels. - gt_score (Variable): The ground-truth boudding boxes mixup scores. - - Returns: - loss (Variable): The loss Variable of YOLOv3 network. - - """ - outputs = self._get_outputs(input, is_train=True) - - losses = [] - downsample = 32 - for i, output in enumerate(outputs): - anchor_mask = self.anchor_masks[i] - loss = fluid.layers.yolov3_loss( - x=output, - gt_box=gt_box, - gt_label=gt_label, - gt_score=gt_score, - anchors=self.anchors, - anchor_mask=anchor_mask, - class_num=self.num_classes, - ignore_thresh=self.ignore_thresh, - downsample_ratio=downsample, - use_label_smooth=self.label_smooth, - name=self.prefix_name + "yolo_loss" + str(i)) - losses.append(fluid.layers.reduce_mean(loss)) - downsample //= 2 - - return sum(losses) - - def get_prediction(self, input, im_size): - """ - Get prediction result of YOLOv3 network - - Args: - input (list): List of Variables, output of backbone stages - im_size (Variable): Variable of size([h, w]) of each image - - Returns: - pred (Variable): The prediction result after non-max suppress. - - """ - - outputs = self._get_outputs(input, is_train=False) - - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=self.nms.score_threshold, - downsample_ratio=downsample, - name=self.prefix_name + "yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - - downsample //= 2 - - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - pred = self.nms(bboxes=yolo_boxes, scores=yolo_scores) - return {'bbox': pred} diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/__init__.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/__init__.py deleted file mode 100644 index 6d2f8aef6172f37e1389a573aa12c4bcc4f71b6c..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import - -from . import faster_rcnn -from . import mask_rcnn -from . import cascade_rcnn -from . import cascade_mask_rcnn -from . import yolov3 -from . import ssd -from . import retinanet -from . import blazeface -from . import faceboxes - -from .faster_rcnn import * -from .mask_rcnn import * -from .cascade_rcnn import * -from .cascade_mask_rcnn import * -from .yolov3 import * -from .ssd import * -from .retinanet import * -from .blazeface import * -from .faceboxes import * diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/blazeface.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/blazeface.py deleted file mode 100644 index cc9a2bb338fd1f3b40be09b7e351c24df06651a4..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/blazeface.py +++ /dev/null @@ -1,182 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -from paddle import fluid - -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -from ppdet.core.workspace import register -from ppdet.modeling.ops import SSDOutputDecoder - -__all__ = ['BlazeFace'] - - -@register -class BlazeFace(object): - """ - BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs, - see https://arxiv.org/abs/1907.05047 - - Args: - backbone (object): backbone instance - output_decoder (object): `SSDOutputDecoder` instance - min_sizes (list|None): min sizes of generated prior boxes. - max_sizes (list|None): max sizes of generated prior boxes. Default: None. - num_classes (int): number of output classes - use_density_prior_box (bool): whether or not use density_prior_box - instead of prior_box - densities (list|None): the densities of generated density prior boxes, - this attribute should be a list or tuple of integers - """ - - __category__ = 'architecture' - __inject__ = ['backbone', 'output_decoder'] - __shared__ = ['num_classes'] - - def __init__(self, - backbone="BlazeNet", - output_decoder=SSDOutputDecoder().__dict__, - min_sizes=[[16., 24.], [32., 48., 64., 80., 96., 128.]], - max_sizes=None, - steps=[8., 16.], - num_classes=2, - use_density_prior_box=False, - densities=[[2, 2], [2, 1, 1, 1, 1, 1]]): - super(BlazeFace, self).__init__() - self.backbone = backbone - self.num_classes = num_classes - self.output_decoder = output_decoder - if isinstance(output_decoder, dict): - self.output_decoder = SSDOutputDecoder(**output_decoder) - self.min_sizes = min_sizes - self.max_sizes = max_sizes - self.steps = steps - self.use_density_prior_box = use_density_prior_box - self.densities = densities - - def build(self, feed_vars, mode='train'): - im = feed_vars['image'] - if mode == 'train': - gt_box = feed_vars['gt_box'] - gt_label = feed_vars['gt_label'] - - body_feats = self.backbone(im) - locs, confs, box, box_var = self._multi_box_head( - inputs=body_feats, - image=im, - num_classes=self.num_classes, - use_density_prior_box=self.use_density_prior_box) - - if mode == 'train': - loss = fluid.layers.ssd_loss( - locs, - confs, - gt_box, - gt_label, - box, - box_var, - overlap_threshold=0.35, - neg_overlap=0.35) - loss = fluid.layers.reduce_sum(loss) - loss.persistable = True - return {'loss': loss} - else: - pred = self.output_decoder(locs, confs, box, box_var) - return {'bbox': pred} - - def _multi_box_head(self, - inputs, - image, - num_classes=2, - use_density_prior_box=False): - def permute_and_reshape(input, last_dim): - trans = fluid.layers.transpose(input, perm=[0, 2, 3, 1]) - compile_shape = [0, -1, last_dim] - return fluid.layers.reshape(trans, shape=compile_shape) - - def _is_list_or_tuple_(data): - return (isinstance(data, list) or isinstance(data, tuple)) - - locs, confs = [], [] - boxes, vars = [], [] - b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.)) - - for i, input in enumerate(inputs): - min_size = self.min_sizes[i] - - if use_density_prior_box: - densities = self.densities[i] - box, var = fluid.layers.density_prior_box( - input, - image, - densities=densities, - fixed_sizes=min_size, - fixed_ratios=[1.], - clip=False, - offset=0.5) - else: - box, var = fluid.layers.prior_box( - input, - image, - min_sizes=min_size, - max_sizes=None, - steps=[self.steps[i]] * 2, - aspect_ratios=[1.], - clip=False, - flip=False, - offset=0.5) - - num_boxes = box.shape[2] - - box = fluid.layers.reshape(box, shape=[-1, 4]) - var = fluid.layers.reshape(var, shape=[-1, 4]) - num_loc_output = num_boxes * 4 - num_conf_output = num_boxes * num_classes - # get loc - mbox_loc = fluid.layers.conv2d( - input, num_loc_output, 3, 1, 1, bias_attr=b_attr) - loc = permute_and_reshape(mbox_loc, 4) - # get conf - mbox_conf = fluid.layers.conv2d( - input, num_conf_output, 3, 1, 1, bias_attr=b_attr) - conf = permute_and_reshape(mbox_conf, 2) - - locs.append(loc) - confs.append(conf) - boxes.append(box) - vars.append(var) - - face_mbox_loc = fluid.layers.concat(locs, axis=1) - face_mbox_conf = fluid.layers.concat(confs, axis=1) - prior_boxes = fluid.layers.concat(boxes) - box_vars = fluid.layers.concat(vars) - return face_mbox_loc, face_mbox_conf, prior_boxes, box_vars - - def train(self, feed_vars): - return self.build(feed_vars, 'train') - - def eval(self, feed_vars): - return self.build(feed_vars, 'eval') - - def test(self, feed_vars): - return self.build(feed_vars, 'test') - - def is_bbox_normalized(self): - return True diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py deleted file mode 100644 index f77ee62759bd9fe7e2b97669e0bdd82d1bddd9ad..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py +++ /dev/null @@ -1,384 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -import paddle.fluid as fluid - -from ppdet.experimental import mixed_precision_global_state -from ppdet.core.workspace import register - -__all__ = ['CascadeMaskRCNN'] - - -@register -class CascadeMaskRCNN(object): - """ - Cascade Mask R-CNN architecture, see https://arxiv.org/abs/1712.00726 - - Args: - backbone (object): backbone instance - rpn_head (object): `RPNhead` instance - bbox_assigner (object): `BBoxAssigner` instance - roi_extractor (object): ROI extractor instance - bbox_head (object): `BBoxHead` instance - mask_assigner (object): `MaskAssigner` instance - mask_head (object): `MaskHead` instance - fpn (object): feature pyramid network instance - """ - - __category__ = 'architecture' - __inject__ = [ - 'backbone', 'rpn_head', 'bbox_assigner', 'roi_extractor', 'bbox_head', - 'mask_assigner', 'mask_head', 'fpn' - ] - - def __init__(self, - backbone, - rpn_head, - roi_extractor='FPNRoIAlign', - bbox_head='CascadeBBoxHead', - bbox_assigner='CascadeBBoxAssigner', - mask_assigner='MaskAssigner', - mask_head='MaskHead', - rpn_only=False, - fpn='FPN'): - super(CascadeMaskRCNN, self).__init__() - assert fpn is not None, "cascade RCNN requires FPN" - self.backbone = backbone - self.fpn = fpn - self.rpn_head = rpn_head - self.bbox_assigner = bbox_assigner - self.roi_extractor = roi_extractor - self.bbox_head = bbox_head - self.mask_assigner = mask_assigner - self.mask_head = mask_head - self.rpn_only = rpn_only - # Cascade local cfg - self.cls_agnostic_bbox_reg = 2 - (brw0, brw1, brw2) = self.bbox_assigner.bbox_reg_weights - self.cascade_bbox_reg_weights = [ - [1. / brw0, 1. / brw0, 2. / brw0, 2. / brw0], - [1. / brw1, 1. / brw1, 2. / brw1, 2. / brw1], - [1. / brw2, 1. / brw2, 2. / brw2, 2. / brw2] - ] - self.cascade_rcnn_loss_weight = [1.0, 0.5, 0.25] - - def build(self, feed_vars, mode='train'): - if mode == 'train': - required_fields = [ - 'gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info' - ] - else: - required_fields = ['im_shape', 'im_info'] - self._input_check(required_fields, feed_vars) - - im = feed_vars['image'] - if mode == 'train': - gt_box = feed_vars['gt_box'] - is_crowd = feed_vars['is_crowd'] - - im_info = feed_vars['im_info'] - - mixed_precision_enabled = mixed_precision_global_state() is not None - # cast inputs to FP16 - if mixed_precision_enabled: - im = fluid.layers.cast(im, 'float16') - - # backbone - body_feats = self.backbone(im) - - # cast features back to FP32 - if mixed_precision_enabled: - body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32')) - for k, v in body_feats.items()) - - # FPN - if self.fpn is not None: - body_feats, spatial_scale = self.fpn.get_output(body_feats) - - # rpn proposals - rpn_rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode) - - if mode == 'train': - rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd) - else: - if self.rpn_only: - im_scale = fluid.layers.slice( - im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, rpn_rois) - rois = rpn_rois / im_scale - return {'proposal': rois} - - proposal_list = [] - roi_feat_list = [] - rcnn_pred_list = [] - rcnn_target_list = [] - - proposals = None - bbox_pred = None - for i in range(3): - if i > 0: - refined_bbox = self._decode_box( - proposals, - bbox_pred, - curr_stage=i - 1, ) - else: - refined_bbox = rpn_rois - - if mode == 'train': - outs = self.bbox_assigner( - input_rois=refined_bbox, feed_vars=feed_vars, curr_stage=i) - - proposals = outs[0] - rcnn_target_list.append(outs) - else: - proposals = refined_bbox - proposal_list.append(proposals) - - # extract roi features - roi_feat = self.roi_extractor(body_feats, proposals, spatial_scale) - roi_feat_list.append(roi_feat) - - # bbox head - cls_score, bbox_pred = self.bbox_head.get_output( - roi_feat, - wb_scalar=1.0 / self.cascade_rcnn_loss_weight[i], - name='_' + str(i + 1) if i > 0 else '') - rcnn_pred_list.append((cls_score, bbox_pred)) - - # get mask rois - rois = proposal_list[2] - - if mode == 'train': - loss = self.bbox_head.get_loss(rcnn_pred_list, rcnn_target_list, - self.cascade_rcnn_loss_weight) - loss.update(rpn_loss) - - labels_int32 = rcnn_target_list[2][1] - - mask_rois, roi_has_mask_int32, mask_int32 = self.mask_assigner( - rois=rois, - gt_classes=feed_vars['gt_label'], - is_crowd=feed_vars['is_crowd'], - gt_segms=feed_vars['gt_mask'], - im_info=feed_vars['im_info'], - labels_int32=labels_int32) - - if self.fpn is None: - bbox_head_feat = self.bbox_head.get_head_feat() - feat = fluid.layers.gather(bbox_head_feat, roi_has_mask_int32) - else: - feat = self.roi_extractor( - body_feats, mask_rois, spatial_scale, is_mask=True) - mask_loss = self.mask_head.get_loss(feat, mask_int32) - loss.update(mask_loss) - - total_loss = fluid.layers.sum(list(loss.values())) - loss.update({'loss': total_loss}) - return loss - else: - mask_name = 'mask_pred' - mask_pred, bbox_pred = self.single_scale_eval( - body_feats, spatial_scale, im_info, mask_name, bbox_pred, - roi_feat_list, rcnn_pred_list, proposal_list, - feed_vars['im_shape']) - return {'bbox': bbox_pred, 'mask': mask_pred} - - def build_multi_scale(self, feed_vars, mask_branch=False): - required_fields = ['image', 'im_info'] - self._input_check(required_fields, feed_vars) - - ims = [] - for k in feed_vars.keys(): - if 'image' in k: - ims.append(feed_vars[k]) - result = {} - - if not mask_branch: - assert 'im_shape' in feed_vars, \ - "{} has no im_shape field".format(feed_vars) - result.update(feed_vars) - - for i, im in enumerate(ims): - im_info = fluid.layers.slice( - input=feed_vars['im_info'], - axes=[1], - starts=[3 * i], - ends=[3 * i + 3]) - body_feats = self.backbone(im) - result.update(body_feats) - - # FPN - if self.fpn is not None: - body_feats, spatial_scale = self.fpn.get_output(body_feats) - rois = self.rpn_head.get_proposals(body_feats, im_info, mode='test') - if not mask_branch: - im_shape = feed_vars['im_shape'] - body_feat_names = list(body_feats.keys()) - proposal_list = [] - roi_feat_list = [] - rcnn_pred_list = [] - - proposals = None - bbox_pred = None - for i in range(3): - if i > 0: - refined_bbox = self._decode_box( - proposals, - bbox_pred, - curr_stage=i - 1, ) - else: - refined_bbox = rois - - proposals = refined_bbox - proposal_list.append(proposals) - - # extract roi features - roi_feat = self.roi_extractor(body_feats, proposals, - spatial_scale) - roi_feat_list.append(roi_feat) - - # bbox head - cls_score, bbox_pred = self.bbox_head.get_output( - roi_feat, - wb_scalar=1.0 / self.cascade_rcnn_loss_weight[i], - name='_' + str(i + 1) if i > 0 else '') - rcnn_pred_list.append((cls_score, bbox_pred)) - - # get mask rois - if self.fpn is None: - body_feat = body_feats[body_feat_names[-1]] - pred = self.bbox_head.get_prediction( - im_info, - im_shape, - roi_feat_list, - rcnn_pred_list, - proposal_list, - self.cascade_bbox_reg_weights, - return_box_score=True) - bbox_name = 'bbox_' + str(i) - score_name = 'score_' + str(i) - if 'flip' in im.name: - bbox_name += '_flip' - score_name += '_flip' - result[bbox_name] = pred['bbox'] - result[score_name] = pred['score'] - else: - mask_name = 'mask_pred_' + str(i) - bbox_pred = feed_vars['bbox'] - result.update({im.name: im}) - if 'flip' in im.name: - mask_name += '_flip' - bbox_pred = feed_vars['bbox_flip'] - mask_pred, bbox_pred = self.single_scale_eval( - body_feats, - spatial_scale, - im_info, - mask_name, - bbox_pred=bbox_pred, - use_multi_test=True) - result[mask_name] = mask_pred - return result - - def single_scale_eval(self, - body_feats, - spatial_scale, - im_info, - mask_name, - bbox_pred, - roi_feat_list=None, - rcnn_pred_list=None, - proposal_list=None, - im_shape=None, - use_multi_test=False): - if self.fpn is None: - last_feat = body_feats[list(body_feats.keys())[-1]] - if not use_multi_test: - bbox_pred = self.bbox_head.get_prediction( - im_info, im_shape, roi_feat_list, rcnn_pred_list, proposal_list, - self.cascade_bbox_reg_weights) - bbox_pred = bbox_pred['bbox'] - - # share weight - bbox_shape = fluid.layers.shape(bbox_pred) - bbox_size = fluid.layers.reduce_prod(bbox_shape) - bbox_size = fluid.layers.reshape(bbox_size, [1, 1]) - size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32') - cond = fluid.layers.less_than(x=bbox_size, y=size) - - mask_pred = fluid.layers.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=False, - name=mask_name) - with fluid.layers.control_flow.Switch() as switch: - with switch.case(cond): - fluid.layers.assign(input=bbox_pred, output=mask_pred) - with switch.default(): - bbox = fluid.layers.slice(bbox_pred, [1], starts=[2], ends=[6]) - - im_scale = fluid.layers.slice( - im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, bbox) - - mask_rois = bbox * im_scale - if self.fpn is None: - mask_feat = self.roi_extractor(last_feat, mask_rois) - mask_feat = self.bbox_head.get_head_feat(mask_feat) - else: - mask_feat = self.roi_extractor( - body_feats, mask_rois, spatial_scale, is_mask=True) - - mask_out = self.mask_head.get_prediction(mask_feat, bbox) - fluid.layers.assign(input=mask_out, output=mask_pred) - return mask_pred, bbox_pred - - def _input_check(self, require_fields, feed_vars): - for var in require_fields: - assert var in feed_vars, \ - "{} has no {} field".format(feed_vars, var) - - def _decode_box(self, proposals, bbox_pred, curr_stage): - rcnn_loc_delta_r = fluid.layers.reshape( - bbox_pred, (-1, self.cls_agnostic_bbox_reg, 4)) - # only use fg box delta to decode box - rcnn_loc_delta_s = fluid.layers.slice( - rcnn_loc_delta_r, axes=[1], starts=[1], ends=[2]) - refined_bbox = fluid.layers.box_coder( - prior_box=proposals, - prior_box_var=self.cascade_bbox_reg_weights[curr_stage], - target_box=rcnn_loc_delta_s, - code_type='decode_center_size', - box_normalized=False, - axis=1, ) - refined_bbox = fluid.layers.reshape(refined_bbox, shape=[-1, 4]) - - return refined_bbox - - def train(self, feed_vars): - return self.build(feed_vars, 'train') - - def eval(self, feed_vars, multi_scale=None, mask_branch=False): - if multi_scale: - return self.build_multi_scale(feed_vars, mask_branch) - return self.build(feed_vars, 'test') - - def test(self, feed_vars): - return self.build(feed_vars, 'test') diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py deleted file mode 100644 index b80a8d7f62ccd13f632fe8124372156d656d2abc..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py +++ /dev/null @@ -1,289 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -import paddle.fluid as fluid - -from ppdet.experimental import mixed_precision_global_state -from ppdet.core.workspace import register - -__all__ = ['CascadeRCNN'] - - -@register -class CascadeRCNN(object): - """ - Cascade R-CNN architecture, see https://arxiv.org/abs/1712.00726 - - Args: - backbone (object): backbone instance - rpn_head (object): `RPNhead` instance - bbox_assigner (object): `BBoxAssigner` instance - roi_extractor (object): ROI extractor instance - bbox_head (object): `BBoxHead` instance - fpn (object): feature pyramid network instance - """ - - __category__ = 'architecture' - __inject__ = [ - 'backbone', 'fpn', 'rpn_head', 'bbox_assigner', 'roi_extractor', - 'bbox_head' - ] - - def __init__(self, - backbone, - rpn_head, - roi_extractor='FPNRoIAlign', - bbox_head='CascadeBBoxHead', - bbox_assigner='CascadeBBoxAssigner', - rpn_only=False, - fpn='FPN'): - super(CascadeRCNN, self).__init__() - assert fpn is not None, "cascade RCNN requires FPN" - self.backbone = backbone - self.fpn = fpn - self.rpn_head = rpn_head - self.bbox_assigner = bbox_assigner - self.roi_extractor = roi_extractor - self.bbox_head = bbox_head - self.rpn_only = rpn_only - # Cascade local cfg - self.cls_agnostic_bbox_reg = 2 - (brw0, brw1, brw2) = self.bbox_assigner.bbox_reg_weights - self.cascade_bbox_reg_weights = [ - [1. / brw0, 1. / brw0, 2. / brw0, 2. / brw0], - [1. / brw1, 1. / brw1, 2. / brw1, 2. / brw1], - [1. / brw2, 1. / brw2, 2. / brw2, 2. / brw2] - ] - self.cascade_rcnn_loss_weight = [1.0, 0.5, 0.25] - - def build(self, feed_vars, mode='train'): - if mode == 'train': - required_fields = ['gt_label', 'gt_box', 'is_crowd', 'im_info'] - else: - required_fields = ['im_shape', 'im_info'] - self._input_check(required_fields, feed_vars) - - im = feed_vars['image'] - im_info = feed_vars['im_info'] - - if mode == 'train': - gt_box = feed_vars['gt_box'] - is_crowd = feed_vars['is_crowd'] - - mixed_precision_enabled = mixed_precision_global_state() is not None - # cast inputs to FP16 - if mixed_precision_enabled: - im = fluid.layers.cast(im, 'float16') - - # backbone - body_feats = self.backbone(im) - - # cast features back to FP32 - if mixed_precision_enabled: - body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32')) - for k, v in body_feats.items()) - - # FPN - if self.fpn is not None: - body_feats, spatial_scale = self.fpn.get_output(body_feats) - - # rpn proposals - rpn_rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode) - - if mode == 'train': - rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd) - else: - if self.rpn_only: - im_scale = fluid.layers.slice( - im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, rpn_rois) - rois = rpn_rois / im_scale - return {'proposal': rois} - - proposal_list = [] - roi_feat_list = [] - rcnn_pred_list = [] - rcnn_target_list = [] - - proposals = None - bbox_pred = None - for i in range(3): - if i > 0: - refined_bbox = self._decode_box( - proposals, - bbox_pred, - curr_stage=i - 1, ) - else: - refined_bbox = rpn_rois - - if mode == 'train': - outs = self.bbox_assigner( - input_rois=refined_bbox, feed_vars=feed_vars, curr_stage=i) - - proposals = outs[0] - rcnn_target_list.append(outs) - else: - proposals = refined_bbox - proposal_list.append(proposals) - - # extract roi features - roi_feat = self.roi_extractor(body_feats, proposals, spatial_scale) - roi_feat_list.append(roi_feat) - - # bbox head - cls_score, bbox_pred = self.bbox_head.get_output( - roi_feat, - wb_scalar=1.0 / self.cascade_rcnn_loss_weight[i], - name='_' + str(i + 1) if i > 0 else '') - rcnn_pred_list.append((cls_score, bbox_pred)) - - if mode == 'train': - loss = self.bbox_head.get_loss(rcnn_pred_list, rcnn_target_list, - self.cascade_rcnn_loss_weight) - loss.update(rpn_loss) - total_loss = fluid.layers.sum(list(loss.values())) - loss.update({'loss': total_loss}) - return loss - else: - pred = self.bbox_head.get_prediction( - im_info, feed_vars['im_shape'], roi_feat_list, rcnn_pred_list, - proposal_list, self.cascade_bbox_reg_weights, - self.cls_agnostic_bbox_reg) - return pred - - def build_multi_scale(self, feed_vars): - required_fields = ['image', 'im_shape', 'im_info'] - self._input_check(required_fields, feed_vars) - ims = [] - for k in feed_vars.keys(): - if 'image' in k: - ims.append(feed_vars[k]) - result = {} - result.update(feed_vars) - for i, im in enumerate(ims): - im_info = fluid.layers.slice( - input=feed_vars['im_info'], - axes=[1], - starts=[3 * i], - ends=[3 * i + 3]) - im_shape = feed_vars['im_shape'] - - # backbone - body_feats = self.backbone(im) - result.update(body_feats) - body_feat_names = list(body_feats.keys()) - - # FPN - if self.fpn is not None: - body_feats, spatial_scale = self.fpn.get_output(body_feats) - - # rpn proposals - rpn_rois = self.rpn_head.get_proposals( - body_feats, im_info, mode='test') - - proposal_list = [] - roi_feat_list = [] - rcnn_pred_list = [] - - proposals = None - bbox_pred = None - for i in range(3): - if i > 0: - refined_bbox = self._decode_box( - proposals, - bbox_pred, - curr_stage=i - 1, ) - else: - refined_bbox = rpn_rois - - proposals = refined_bbox - proposal_list.append(proposals) - - # extract roi features - roi_feat = self.roi_extractor(body_feats, proposals, - spatial_scale) - roi_feat_list.append(roi_feat) - - # bbox head - cls_score, bbox_pred = self.bbox_head.get_output( - roi_feat, - wb_scalar=1.0 / self.cascade_rcnn_loss_weight[i], - name='_' + str(i + 1) if i > 0 else '') - rcnn_pred_list.append((cls_score, bbox_pred)) - - # get mask rois - rois = proposal_list[2] - - if self.fpn is None: - last_feat = body_feats[list(body_feats.keys())[-1]] - roi_feat = self.roi_extractor(last_feat, rois) - else: - roi_feat = self.roi_extractor(body_feats, rois, spatial_scale) - - pred = self.bbox_head.get_prediction( - im_info, - im_shape, - roi_feat_list, - rcnn_pred_list, - proposal_list, - self.cascade_bbox_reg_weights, - self.cls_agnostic_bbox_reg, - return_box_score=True) - bbox_name = 'bbox_' + str(i) - score_name = 'score_' + str(i) - if 'flip' in im.name: - bbox_name += '_flip' - score_name += '_flip' - result[bbox_name] = pred['bbox'] - result[score_name] = pred['score'] - return result - - def _input_check(self, require_fields, feed_vars): - for var in require_fields: - assert var in feed_vars, \ - "{} has no {} field".format(feed_vars, var) - - def _decode_box(self, proposals, bbox_pred, curr_stage): - rcnn_loc_delta_r = fluid.layers.reshape( - bbox_pred, (-1, self.cls_agnostic_bbox_reg, 4)) - # only use fg box delta to decode box - rcnn_loc_delta_s = fluid.layers.slice( - rcnn_loc_delta_r, axes=[1], starts=[1], ends=[2]) - refined_bbox = fluid.layers.box_coder( - prior_box=proposals, - prior_box_var=self.cascade_bbox_reg_weights[curr_stage], - target_box=rcnn_loc_delta_s, - code_type='decode_center_size', - box_normalized=False, - axis=1, ) - refined_bbox = fluid.layers.reshape(refined_bbox, shape=[-1, 4]) - - return refined_bbox - - def train(self, feed_vars): - return self.build(feed_vars, 'train') - - def eval(self, feed_vars, multi_scale=None): - if multi_scale: - return self.build_multi_scale(feed_vars) - return self.build(feed_vars, 'test') - - def test(self, feed_vars): - return self.build(feed_vars, 'test') diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/faceboxes.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/faceboxes.py deleted file mode 100644 index 194b3a7e864f30051a20e514bbf9a1e970548afc..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/faceboxes.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -from paddle import fluid - -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -from ppdet.core.workspace import register -from ppdet.modeling.ops import SSDOutputDecoder - -__all__ = ['FaceBoxes'] - - -@register -class FaceBoxes(object): - """ - FaceBoxes: Sub-millisecond Neural Face Detection on Mobile GPUs, - see https://https://arxiv.org/abs/1708.05234 - - Args: - backbone (object): backbone instance - output_decoder (object): `SSDOutputDecoder` instance - densities (list|None): the densities of generated density prior boxes, - this attribute should be a list or tuple of integers. - fixed_sizes (list|None): the fixed sizes of generated density prior boxes, - this attribute should a list or tuple of same length with `densities`. - num_classes (int): number of output classes - """ - - __category__ = 'architecture' - __inject__ = ['backbone', 'output_decoder'] - __shared__ = ['num_classes'] - - def __init__(self, - backbone="FaceBoxNet", - output_decoder=SSDOutputDecoder().__dict__, - densities=[[4, 2, 1], [1], [1]], - fixed_sizes=[[32., 64., 128.], [256.], [512.]], - num_classes=2): - super(FaceBoxes, self).__init__() - self.backbone = backbone - self.num_classes = num_classes - self.output_decoder = output_decoder - if isinstance(output_decoder, dict): - self.output_decoder = SSDOutputDecoder(**output_decoder) - self.densities = densities - self.fixed_sizes = fixed_sizes - - def build(self, feed_vars, mode='train'): - im = feed_vars['image'] - if mode == 'train': - gt_box = feed_vars['gt_box'] - gt_label = feed_vars['gt_label'] - - body_feats = self.backbone(im) - locs, confs, box, box_var = self._multi_box_head( - inputs=body_feats, image=im, num_classes=self.num_classes) - - if mode == 'train': - loss = fluid.layers.ssd_loss( - locs, - confs, - gt_box, - gt_label, - box, - box_var, - overlap_threshold=0.35, - neg_overlap=0.35) - loss = fluid.layers.reduce_sum(loss) - loss.persistable = True - return {'loss': loss} - else: - pred = self.output_decoder(locs, confs, box, box_var) - return {'bbox': pred} - - def _multi_box_head(self, inputs, image, num_classes=2): - def permute_and_reshape(input, last_dim): - trans = fluid.layers.transpose(input, perm=[0, 2, 3, 1]) - compile_shape = [0, -1, last_dim] - return fluid.layers.reshape(trans, shape=compile_shape) - - def _is_list_or_tuple_(data): - return (isinstance(data, list) or isinstance(data, tuple)) - - locs, confs = [], [] - boxes, vars = [], [] - b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.)) - - for i, input in enumerate(inputs): - densities = self.densities[i] - fixed_sizes = self.fixed_sizes[i] - box, var = fluid.layers.density_prior_box( - input, - image, - densities=densities, - fixed_sizes=fixed_sizes, - fixed_ratios=[1.], - clip=False, - offset=0.5) - - num_boxes = box.shape[2] - - box = fluid.layers.reshape(box, shape=[-1, 4]) - var = fluid.layers.reshape(var, shape=[-1, 4]) - num_loc_output = num_boxes * 4 - num_conf_output = num_boxes * num_classes - # get loc - mbox_loc = fluid.layers.conv2d( - input, num_loc_output, 3, 1, 1, bias_attr=b_attr) - loc = permute_and_reshape(mbox_loc, 4) - # get conf - mbox_conf = fluid.layers.conv2d( - input, num_conf_output, 3, 1, 1, bias_attr=b_attr) - conf = permute_and_reshape(mbox_conf, 2) - - locs.append(loc) - confs.append(conf) - boxes.append(box) - vars.append(var) - - face_mbox_loc = fluid.layers.concat(locs, axis=1) - face_mbox_conf = fluid.layers.concat(confs, axis=1) - prior_boxes = fluid.layers.concat(boxes) - box_vars = fluid.layers.concat(vars) - return face_mbox_loc, face_mbox_conf, prior_boxes, box_vars - - def train(self, feed_vars): - return self.build(feed_vars, 'train') - - def eval(self, feed_vars): - return self.build(feed_vars, 'eval') - - def test(self, feed_vars): - return self.build(feed_vars, 'test') - - def is_bbox_normalized(self): - return True diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/faster_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/faster_rcnn.py deleted file mode 100644 index e0ef7355c0d358d7b409ce3080f2416cd38de0b3..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/faster_rcnn.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid - -from ppdet.experimental import mixed_precision_global_state -from ppdet.core.workspace import register - -__all__ = ['FasterRCNN'] - - -@register -class FasterRCNN(object): - """ - Faster R-CNN architecture, see https://arxiv.org/abs/1506.01497 - Args: - backbone (object): backbone instance - rpn_head (object): `RPNhead` instance - bbox_assigner (object): `BBoxAssigner` instance - roi_extractor (object): ROI extractor instance - bbox_head (object): `BBoxHead` instance - fpn (object): feature pyramid network instance - """ - - __category__ = 'architecture' - __inject__ = [ - 'backbone', 'rpn_head', 'bbox_assigner', 'roi_extractor', 'bbox_head', - 'fpn' - ] - - def __init__(self, - backbone, - rpn_head, - roi_extractor, - bbox_head='BBoxHead', - bbox_assigner='BBoxAssigner', - rpn_only=False, - fpn=None): - super(FasterRCNN, self).__init__() - self.backbone = backbone - self.rpn_head = rpn_head - self.bbox_assigner = bbox_assigner - self.roi_extractor = roi_extractor - self.bbox_head = bbox_head - self.fpn = fpn - self.rpn_only = rpn_only - - def build(self, feed_vars, mode='train'): - if mode == 'train': - required_fields = ['gt_label', 'gt_box', 'is_crowd', 'im_info'] - else: - required_fields = ['im_shape', 'im_info'] - self._input_check(required_fields, feed_vars) - - im = feed_vars['image'] - im_info = feed_vars['im_info'] - if mode == 'train': - gt_box = feed_vars['gt_box'] - is_crowd = feed_vars['is_crowd'] - else: - im_shape = feed_vars['im_shape'] - - mixed_precision_enabled = mixed_precision_global_state() is not None - - # cast inputs to FP16 - if mixed_precision_enabled: - im = fluid.layers.cast(im, 'float16') - - body_feats = self.backbone(im) - body_feat_names = list(body_feats.keys()) - - # cast features back to FP32 - if mixed_precision_enabled: - body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32')) - for k, v in body_feats.items()) - - if self.fpn is not None: - body_feats, spatial_scale = self.fpn.get_output(body_feats) - - rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode) - - if mode == 'train': - rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd) - # sampled rpn proposals - for var in ['gt_label', 'is_crowd', 'gt_box', 'im_info']: - assert var in feed_vars, "{} has no {}".format(feed_vars, var) - outs = self.bbox_assigner( - rpn_rois=rois, - gt_classes=feed_vars['gt_label'], - is_crowd=feed_vars['is_crowd'], - gt_boxes=feed_vars['gt_box'], - im_info=feed_vars['im_info']) - - rois = outs[0] - labels_int32 = outs[1] - bbox_targets = outs[2] - bbox_inside_weights = outs[3] - bbox_outside_weights = outs[4] - else: - if self.rpn_only: - im_scale = fluid.layers.slice( - im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, rois) - rois = rois / im_scale - return {'proposal': rois} - if self.fpn is None: - # in models without FPN, roi extractor only uses the last level of - # feature maps. And body_feat_names[-1] represents the name of - # last feature map. - body_feat = body_feats[body_feat_names[-1]] - roi_feat = self.roi_extractor(body_feat, rois) - else: - roi_feat = self.roi_extractor(body_feats, rois, spatial_scale) - - if mode == 'train': - loss = self.bbox_head.get_loss(roi_feat, labels_int32, bbox_targets, - bbox_inside_weights, - bbox_outside_weights) - loss.update(rpn_loss) - total_loss = fluid.layers.sum(list(loss.values())) - loss.update({'loss': total_loss}) - return loss - else: - pred = self.bbox_head.get_prediction(roi_feat, rois, im_info, - im_shape) - return pred - - def build_multi_scale(self, feed_vars): - required_fields = ['image', 'im_info', 'im_shape'] - self._input_check(required_fields, feed_vars) - ims = [] - for k in feed_vars.keys(): - if 'image' in k: - ims.append(feed_vars[k]) - result = {} - result.update(feed_vars) - for i, im in enumerate(ims): - im_info = fluid.layers.slice( - input=feed_vars['im_info'], - axes=[1], - starts=[3 * i], - ends=[3 * i + 3]) - im_shape = feed_vars['im_shape'] - body_feats = self.backbone(im) - result.update(body_feats) - body_feat_names = list(body_feats.keys()) - - if self.fpn is not None: - body_feats, spatial_scale = self.fpn.get_output(body_feats) - - rois = self.rpn_head.get_proposals(body_feats, im_info, mode='test') - - if self.fpn is None: - # in models without FPN, roi extractor only uses the last level of - # feature maps. And body_feat_names[-1] represents the name of - # last feature map. - body_feat = body_feats[body_feat_names[-1]] - roi_feat = self.roi_extractor(body_feat, rois) - else: - roi_feat = self.roi_extractor(body_feats, rois, spatial_scale) - - pred = self.bbox_head.get_prediction( - roi_feat, rois, im_info, im_shape, return_box_score=True) - bbox_name = 'bbox_' + str(i) - score_name = 'score_' + str(i) - if 'flip' in im.name: - bbox_name += '_flip' - score_name += '_flip' - result[bbox_name] = pred['bbox'] - result[score_name] = pred['score'] - return result - - def _input_check(self, require_fields, feed_vars): - for var in require_fields: - assert var in feed_vars, \ - "{} has no {} field".format(feed_vars, var) - - def train(self, feed_vars): - return self.build(feed_vars, 'train') - - def eval(self, feed_vars, multi_scale=None): - if multi_scale: - return self.build_multi_scale(feed_vars) - return self.build(feed_vars, 'test') - - def test(self, feed_vars): - return self.build(feed_vars, 'test') diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/mask_rcnn.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/mask_rcnn.py deleted file mode 100644 index 715f3efa90e52c3330f36e9ba787082722f2c8bb..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/mask_rcnn.py +++ /dev/null @@ -1,285 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -import paddle.fluid as fluid - -from ppdet.experimental import mixed_precision_global_state -from ppdet.core.workspace import register - -__all__ = ['MaskRCNN'] - - -@register -class MaskRCNN(object): - """ - Mask R-CNN architecture, see https://arxiv.org/abs/1703.06870 - Args: - backbone (object): backbone instance - rpn_head (object): `RPNhead` instance - bbox_assigner (object): `BBoxAssigner` instance - roi_extractor (object): ROI extractor instance - bbox_head (object): `BBoxHead` instance - mask_assigner (object): `MaskAssigner` instance - mask_head (object): `MaskHead` instance - fpn (object): feature pyramid network instance - """ - - __category__ = 'architecture' - __inject__ = [ - 'backbone', 'rpn_head', 'bbox_assigner', 'roi_extractor', 'bbox_head', - 'mask_assigner', 'mask_head', 'fpn' - ] - - def __init__(self, - backbone, - rpn_head, - bbox_head='BBoxHead', - bbox_assigner='BBoxAssigner', - roi_extractor='RoIAlign', - mask_assigner='MaskAssigner', - mask_head='MaskHead', - rpn_only=False, - fpn=None): - super(MaskRCNN, self).__init__() - self.backbone = backbone - self.rpn_head = rpn_head - self.bbox_assigner = bbox_assigner - self.roi_extractor = roi_extractor - self.bbox_head = bbox_head - self.mask_assigner = mask_assigner - self.mask_head = mask_head - self.rpn_only = rpn_only - self.fpn = fpn - - def build(self, feed_vars, mode='train'): - if mode == 'train': - required_fields = [ - 'gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info' - ] - else: - required_fields = ['im_shape', 'im_info'] - self._input_check(required_fields, feed_vars) - im = feed_vars['image'] - im_info = feed_vars['im_info'] - - mixed_precision_enabled = mixed_precision_global_state() is not None - # cast inputs to FP16 - if mixed_precision_enabled: - im = fluid.layers.cast(im, 'float16') - - # backbone - body_feats = self.backbone(im) - - # cast features back to FP32 - if mixed_precision_enabled: - body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32')) - for k, v in body_feats.items()) - - # FPN - spatial_scale = None - if self.fpn is not None: - body_feats, spatial_scale = self.fpn.get_output(body_feats) - - # RPN proposals - rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode) - - if mode == 'train': - rpn_loss = self.rpn_head.get_loss(im_info, feed_vars['gt_box'], - feed_vars['is_crowd']) - - outs = self.bbox_assigner( - rpn_rois=rois, - gt_classes=feed_vars['gt_label'], - is_crowd=feed_vars['is_crowd'], - gt_boxes=feed_vars['gt_box'], - im_info=feed_vars['im_info']) - rois = outs[0] - labels_int32 = outs[1] - - if self.fpn is None: - last_feat = body_feats[list(body_feats.keys())[-1]] - roi_feat = self.roi_extractor(last_feat, rois) - else: - roi_feat = self.roi_extractor(body_feats, rois, spatial_scale) - - loss = self.bbox_head.get_loss(roi_feat, labels_int32, *outs[2:]) - loss.update(rpn_loss) - - mask_rois, roi_has_mask_int32, mask_int32 = self.mask_assigner( - rois=rois, - gt_classes=feed_vars['gt_label'], - is_crowd=feed_vars['is_crowd'], - gt_segms=feed_vars['gt_mask'], - im_info=feed_vars['im_info'], - labels_int32=labels_int32) - if self.fpn is None: - bbox_head_feat = self.bbox_head.get_head_feat() - feat = fluid.layers.gather(bbox_head_feat, roi_has_mask_int32) - else: - feat = self.roi_extractor( - body_feats, mask_rois, spatial_scale, is_mask=True) - - mask_loss = self.mask_head.get_loss(feat, mask_int32) - loss.update(mask_loss) - - total_loss = fluid.layers.sum(list(loss.values())) - loss.update({'loss': total_loss}) - return loss - - else: - if self.rpn_only: - im_scale = fluid.layers.slice( - im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, rois) - rois = rois / im_scale - return {'proposal': rois} - mask_name = 'mask_pred' - mask_pred, bbox_pred = self.single_scale_eval( - body_feats, mask_name, rois, im_info, feed_vars['im_shape'], - spatial_scale) - return {'bbox': bbox_pred, 'mask': mask_pred} - - def build_multi_scale(self, feed_vars, mask_branch=False): - required_fields = ['image', 'im_info'] - self._input_check(required_fields, feed_vars) - - ims = [] - for k in feed_vars.keys(): - if 'image' in k: - ims.append(feed_vars[k]) - result = {} - - if not mask_branch: - assert 'im_shape' in feed_vars, \ - "{} has no im_shape field".format(feed_vars) - result.update(feed_vars) - - for i, im in enumerate(ims): - im_info = fluid.layers.slice( - input=feed_vars['im_info'], - axes=[1], - starts=[3 * i], - ends=[3 * i + 3]) - body_feats = self.backbone(im) - result.update(body_feats) - - # FPN - if self.fpn is not None: - body_feats, spatial_scale = self.fpn.get_output(body_feats) - rois = self.rpn_head.get_proposals(body_feats, im_info, mode='test') - if not mask_branch: - im_shape = feed_vars['im_shape'] - body_feat_names = list(body_feats.keys()) - if self.fpn is None: - body_feat = body_feats[body_feat_names[-1]] - roi_feat = self.roi_extractor(body_feat, rois) - else: - roi_feat = self.roi_extractor(body_feats, rois, - spatial_scale) - pred = self.bbox_head.get_prediction( - roi_feat, rois, im_info, im_shape, return_box_score=True) - bbox_name = 'bbox_' + str(i) - score_name = 'score_' + str(i) - if 'flip' in im.name: - bbox_name += '_flip' - score_name += '_flip' - result[bbox_name] = pred['bbox'] - result[score_name] = pred['score'] - else: - mask_name = 'mask_pred_' + str(i) - bbox_pred = feed_vars['bbox'] - result.update({im.name: im}) - if 'flip' in im.name: - mask_name += '_flip' - bbox_pred = feed_vars['bbox_flip'] - mask_pred, bbox_pred = self.single_scale_eval( - body_feats, mask_name, rois, im_info, feed_vars['im_shape'], - spatial_scale, bbox_pred) - result[mask_name] = mask_pred - return result - - def single_scale_eval(self, - body_feats, - mask_name, - rois, - im_info, - im_shape, - spatial_scale, - bbox_pred=None): - if self.fpn is None: - last_feat = body_feats[list(body_feats.keys())[-1]] - roi_feat = self.roi_extractor(last_feat, rois) - else: - roi_feat = self.roi_extractor(body_feats, rois, spatial_scale) - if not bbox_pred: - bbox_pred = self.bbox_head.get_prediction(roi_feat, rois, im_info, - im_shape) - bbox_pred = bbox_pred['bbox'] - - # share weight - bbox_shape = fluid.layers.shape(bbox_pred) - bbox_size = fluid.layers.reduce_prod(bbox_shape) - bbox_size = fluid.layers.reshape(bbox_size, [1, 1]) - size = fluid.layers.fill_constant([1, 1], value=6, dtype='int32') - cond = fluid.layers.less_than(x=bbox_size, y=size) - - mask_pred = fluid.layers.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=False, - name=mask_name) - with fluid.layers.control_flow.Switch() as switch: - with switch.case(cond): - fluid.layers.assign(input=bbox_pred, output=mask_pred) - with switch.default(): - bbox = fluid.layers.slice(bbox_pred, [1], starts=[2], ends=[6]) - - im_scale = fluid.layers.slice( - im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, bbox) - - mask_rois = bbox * im_scale - if self.fpn is None: - mask_feat = self.roi_extractor(last_feat, mask_rois) - mask_feat = self.bbox_head.get_head_feat(mask_feat) - else: - mask_feat = self.roi_extractor( - body_feats, mask_rois, spatial_scale, is_mask=True) - - mask_out = self.mask_head.get_prediction(mask_feat, bbox) - fluid.layers.assign(input=mask_out, output=mask_pred) - return mask_pred, bbox_pred - - def _input_check(self, require_fields, feed_vars): - for var in require_fields: - assert var in feed_vars, \ - "{} has no {} field".format(feed_vars, var) - - def train(self, feed_vars): - return self.build(feed_vars, 'train') - - def eval(self, feed_vars, multi_scale=None, mask_branch=False): - if multi_scale: - return self.build_multi_scale(feed_vars, mask_branch) - return self.build(feed_vars, 'test') - - def test(self, feed_vars): - return self.build(feed_vars, 'test') diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/retinanet.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/retinanet.py deleted file mode 100644 index 4ce5ac500c65fa713b3735b480e8e40b9b123063..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/retinanet.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -import paddle.fluid as fluid - -from ppdet.experimental import mixed_precision_global_state -from ppdet.core.workspace import register - -__all__ = ['RetinaNet'] - - -@register -class RetinaNet(object): - """ - RetinaNet architecture, see https://arxiv.org/abs/1708.02002 - - Args: - backbone (object): backbone instance - fpn (object): feature pyramid network instance - retina_head (object): `RetinaHead` instance - """ - - __category__ = 'architecture' - __inject__ = ['backbone', 'fpn', 'retina_head'] - - def __init__(self, backbone, fpn, retina_head): - super(RetinaNet, self).__init__() - self.backbone = backbone - self.fpn = fpn - self.retina_head = retina_head - - def build(self, feed_vars, mode='train'): - im = feed_vars['image'] - im_info = feed_vars['im_info'] - if mode == 'train': - gt_box = feed_vars['gt_box'] - gt_label = feed_vars['gt_label'] - is_crowd = feed_vars['is_crowd'] - - mixed_precision_enabled = mixed_precision_global_state() is not None - # cast inputs to FP16 - if mixed_precision_enabled: - im = fluid.layers.cast(im, 'float16') - - # backbone - body_feats = self.backbone(im) - - # cast features back to FP32 - if mixed_precision_enabled: - body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32')) - for k, v in body_feats.items()) - - # FPN - body_feats, spatial_scale = self.fpn.get_output(body_feats) - - # retinanet head - if mode == 'train': - loss = self.retina_head.get_loss(body_feats, spatial_scale, im_info, - gt_box, gt_label, is_crowd) - total_loss = fluid.layers.sum(list(loss.values())) - loss.update({'loss': total_loss}) - return loss - else: - pred = self.retina_head.get_prediction(body_feats, spatial_scale, - im_info) - return pred - - def train(self, feed_vars): - return self.build(feed_vars, 'train') - - def eval(self, feed_vars): - return self.build(feed_vars, 'test') - - def test(self, feed_vars): - return self.build(feed_vars, 'test') diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/ssd.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/ssd.py deleted file mode 100644 index e899075f21291a38a5af0e4c8d5e70af4e55eaec..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/ssd.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -import paddle.fluid as fluid - -from ppdet.experimental import mixed_precision_global_state -from ppdet.core.workspace import register -from ppdet.modeling.ops import SSDOutputDecoder - -__all__ = ['SSD'] - - -@register -class SSD(object): - """ - Single Shot MultiBox Detector, see https://arxiv.org/abs/1512.02325 - - Args: - backbone (object): backbone instance - multi_box_head (object): `MultiBoxHead` instance - output_decoder (object): `SSDOutputDecoder` instance - num_classes (int): number of output classes - """ - - __category__ = 'architecture' - __inject__ = ['backbone', 'multi_box_head', 'output_decoder'] - __shared__ = ['num_classes'] - - def __init__(self, - backbone, - multi_box_head='MultiBoxHead', - output_decoder=SSDOutputDecoder().__dict__, - num_classes=21): - super(SSD, self).__init__() - self.backbone = backbone - self.multi_box_head = multi_box_head - self.num_classes = num_classes - self.output_decoder = output_decoder - if isinstance(output_decoder, dict): - self.output_decoder = SSDOutputDecoder(**output_decoder) - - def build(self, feed_vars, mode='train'): - im = feed_vars['image'] - if mode == 'train' or mode == 'eval': - gt_box = feed_vars['gt_box'] - gt_label = feed_vars['gt_label'] - - mixed_precision_enabled = mixed_precision_global_state() is not None - # cast inputs to FP16 - if mixed_precision_enabled: - im = fluid.layers.cast(im, 'float16') - - # backbone - body_feats = self.backbone(im) - - if isinstance(body_feats, OrderedDict): - body_feat_names = list(body_feats.keys()) - body_feats = [body_feats[name] for name in body_feat_names] - - # cast features back to FP32 - if mixed_precision_enabled: - body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats] - - locs, confs, box, box_var = self.multi_box_head( - inputs=body_feats, image=im, num_classes=self.num_classes) - - if mode == 'train': - loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, - box_var) - loss = fluid.layers.reduce_sum(loss) - return {'loss': loss} - else: - pred = self.output_decoder(locs, confs, box, box_var) - return {'bbox': pred} - - def train(self, feed_vars): - return self.build(feed_vars, 'train') - - def eval(self, feed_vars): - return self.build(feed_vars, 'eval') - - def test(self, feed_vars): - return self.build(feed_vars, 'test') - - def is_bbox_normalized(self): - # SSD use output_decoder in output layers, bbox is normalized - # to range [0, 1], is_bbox_normalized is used in eval.py and infer.py - return True diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/yolov3.py b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/yolov3.py deleted file mode 100644 index 2912ffda5215af594d57255397b8a572455aa090..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/yolov3.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid - -from ppdet.experimental import mixed_precision_global_state -from ppdet.core.workspace import register - -__all__ = ['YOLOv3'] - - -@register -class YOLOv3(object): - """ - YOLOv3 network, see https://arxiv.org/abs/1804.02767 - - Args: - backbone (object): an backbone instance - yolo_head (object): an `YOLOv3Head` instance - """ - - __category__ = 'architecture' - __inject__ = ['backbone', 'yolo_head'] - - def __init__(self, backbone, yolo_head='YOLOv3Head'): - super(YOLOv3, self).__init__() - self.backbone = backbone - self.yolo_head = yolo_head - - def build(self, feed_vars, mode='train'): - im = feed_vars['image'] - - mixed_precision_enabled = mixed_precision_global_state() is not None - - # cast inputs to FP16 - if mixed_precision_enabled: - im = fluid.layers.cast(im, 'float16') - - body_feats = self.backbone(im) - - if isinstance(body_feats, OrderedDict): - body_feat_names = list(body_feats.keys()) - body_feats = [body_feats[name] for name in body_feat_names] - - # cast features back to FP32 - if mixed_precision_enabled: - body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats] - - if mode == 'train': - gt_box = feed_vars['gt_box'] - gt_label = feed_vars['gt_label'] - gt_score = feed_vars['gt_score'] - - return { - 'loss': self.yolo_head.get_loss(body_feats, gt_box, gt_label, - gt_score) - } - else: - im_size = feed_vars['im_size'] - return self.yolo_head.get_prediction(body_feats, im_size) - - def train(self, feed_vars): - return self.build(feed_vars, mode='train') - - def eval(self, feed_vars): - return self.build(feed_vars, mode='test') - - def test(self, feed_vars): - return self.build(feed_vars, mode='test') diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/__init__.py b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/__init__.py deleted file mode 100644 index 48081429fecd83ad086fb66fb7eac99e0cc96fb5..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import - -from . import resnet -from . import resnext -from . import darknet -from . import mobilenet -from . import senet -from . import fpn -from . import vgg -from . import blazenet -from . import faceboxnet - -from .resnet import * -from .resnext import * -from .darknet import * -from .mobilenet import * -from .senet import * -from .fpn import * -from .vgg import * -from .blazenet import * -from .faceboxnet import * diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/blazenet.py b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/blazenet.py deleted file mode 100644 index 54c3f7e262464661f39fb73a9c5c70eabe4955c9..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/blazenet.py +++ /dev/null @@ -1,314 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr - -from ppdet.core.workspace import register - -__all__ = ['BlazeNet'] - - -@register -class BlazeNet(object): - """ - BlazeFace, see https://arxiv.org/abs/1907.05047 - - Args: - blaze_filters (list): number of filter for each blaze block - double_blaze_filters (list): number of filter for each double_blaze block - with_extra_blocks (bool): whether or not extra blocks should be added - lite_edition (bool): whether or not is blazeface-lite - """ - - def __init__( - self, - blaze_filters=[[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]], - double_blaze_filters=[[48, 24, 96, 2], [96, 24, 96], [96, 24, 96], - [96, 24, 96, 2], [96, 24, 96], [96, 24, 96]], - with_extra_blocks=True, - lite_edition=False): - super(BlazeNet, self).__init__() - - self.blaze_filters = blaze_filters - self.double_blaze_filters = double_blaze_filters - self.with_extra_blocks = with_extra_blocks - self.lite_edition = lite_edition - - def __call__(self, input): - if not self.lite_edition: - conv1_num_filters = self.blaze_filters[0][0] - conv = self._conv_norm( - input=input, - num_filters=conv1_num_filters, - filter_size=3, - stride=2, - padding=1, - act='relu', - name="conv1") - - for k, v in enumerate(self.blaze_filters): - assert len(v) in [2, 3], \ - "blaze_filters {} not in [2, 3]" - if len(v) == 2: - conv = self.BlazeBlock( - conv, v[0], v[1], name='blaze_{}'.format(k)) - elif len(v) == 3: - conv = self.BlazeBlock( - conv, - v[0], - v[1], - stride=v[2], - name='blaze_{}'.format(k)) - - layers = [] - for k, v in enumerate(self.double_blaze_filters): - assert len(v) in [3, 4], \ - "blaze_filters {} not in [3, 4]" - if len(v) == 3: - conv = self.BlazeBlock( - conv, - v[0], - v[1], - double_channels=v[2], - name='double_blaze_{}'.format(k)) - elif len(v) == 4: - layers.append(conv) - conv = self.BlazeBlock( - conv, - v[0], - v[1], - double_channels=v[2], - stride=v[3], - name='double_blaze_{}'.format(k)) - layers.append(conv) - - if not self.with_extra_blocks: - return layers[-1] - return layers[-2], layers[-1] - else: - conv1 = self._conv_norm( - input=input, - num_filters=24, - filter_size=5, - stride=2, - padding=2, - act='relu', - name="conv1") - conv2 = self.Blaze_lite(conv1, 24, 24, 1, 'conv2') - conv3 = self.Blaze_lite(conv2, 24, 28, 1, 'conv3') - conv4 = self.Blaze_lite(conv3, 28, 32, 2, 'conv4') - conv5 = self.Blaze_lite(conv4, 32, 36, 1, 'conv5') - conv6 = self.Blaze_lite(conv5, 36, 42, 1, 'conv6') - conv7 = self.Blaze_lite(conv6, 42, 48, 2, 'conv7') - in_ch = 48 - for i in range(5): - conv7 = self.Blaze_lite(conv7, in_ch, in_ch + 8, 1, - 'conv{}'.format(8 + i)) - in_ch += 8 - assert in_ch == 88 - conv13 = self.Blaze_lite(conv7, 88, 96, 2, 'conv13') - for i in range(4): - conv13 = self.Blaze_lite(conv13, 96, 96, 1, - 'conv{}'.format(14 + i)) - - return conv7, conv13 - - def BlazeBlock(self, - input, - in_channels, - out_channels, - double_channels=None, - stride=1, - use_5x5kernel=True, - name=None): - assert stride in [1, 2] - use_pool = not stride == 1 - use_double_block = double_channels is not None - act = 'relu' if use_double_block else None - - if use_5x5kernel: - conv_dw = self._conv_norm( - input=input, - filter_size=5, - num_filters=in_channels, - stride=stride, - padding=2, - num_groups=in_channels, - use_cudnn=False, - name=name + "1_dw") - else: - conv_dw_1 = self._conv_norm( - input=input, - filter_size=3, - num_filters=in_channels, - stride=1, - padding=1, - num_groups=in_channels, - use_cudnn=False, - name=name + "1_dw_1") - conv_dw = self._conv_norm( - input=conv_dw_1, - filter_size=3, - num_filters=in_channels, - stride=stride, - padding=1, - num_groups=in_channels, - use_cudnn=False, - name=name + "1_dw_2") - - conv_pw = self._conv_norm( - input=conv_dw, - filter_size=1, - num_filters=out_channels, - stride=1, - padding=0, - act=act, - name=name + "1_sep") - - if use_double_block: - if use_5x5kernel: - conv_dw = self._conv_norm( - input=conv_pw, - filter_size=5, - num_filters=out_channels, - stride=1, - padding=2, - use_cudnn=False, - name=name + "2_dw") - else: - conv_dw_1 = self._conv_norm( - input=conv_pw, - filter_size=3, - num_filters=out_channels, - stride=1, - padding=1, - num_groups=out_channels, - use_cudnn=False, - name=name + "2_dw_1") - conv_dw = self._conv_norm( - input=conv_dw_1, - filter_size=3, - num_filters=out_channels, - stride=1, - padding=1, - num_groups=out_channels, - use_cudnn=False, - name=name + "2_dw_2") - - conv_pw = self._conv_norm( - input=conv_dw, - filter_size=1, - num_filters=double_channels, - stride=1, - padding=0, - name=name + "2_sep") - - # shortcut - if use_pool: - shortcut_channel = double_channels or out_channels - shortcut_pool = self._pooling_block(input, stride, stride) - channel_pad = self._conv_norm( - input=shortcut_pool, - filter_size=1, - num_filters=shortcut_channel, - stride=1, - padding=0, - name="shortcut" + name) - return fluid.layers.elementwise_add( - x=channel_pad, y=conv_pw, act='relu') - return fluid.layers.elementwise_add(x=input, y=conv_pw, act='relu') - - def Blaze_lite(self, input, in_channels, out_channels, stride=1, name=None): - assert stride in [1, 2] - use_pool = not stride == 1 - ues_pad = not in_channels == out_channels - conv_dw = self._conv_norm( - input=input, - filter_size=3, - num_filters=in_channels, - stride=stride, - padding=1, - num_groups=in_channels, - name=name + "_dw") - - conv_pw = self._conv_norm( - input=conv_dw, - filter_size=1, - num_filters=out_channels, - stride=1, - padding=0, - name=name + "_sep") - - if use_pool: - shortcut_pool = self._pooling_block(input, stride, stride) - if ues_pad: - conv_pad = shortcut_pool if use_pool else input - channel_pad = self._conv_norm( - input=conv_pad, - filter_size=1, - num_filters=out_channels, - stride=1, - padding=0, - name="shortcut" + name) - return fluid.layers.elementwise_add( - x=channel_pad, y=conv_pw, act='relu') - return fluid.layers.elementwise_add(x=input, y=conv_pw, act='relu') - - def _conv_norm( - self, - input, - filter_size, - num_filters, - stride, - padding, - num_groups=1, - act='relu', # None - use_cudnn=True, - name=None): - parameter_attr = ParamAttr( - learning_rate=0.1, - initializer=fluid.initializer.MSRA(), - name=name + "_weights") - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=parameter_attr, - bias_attr=False) - return fluid.layers.batch_norm(input=conv, act=act) - - def _pooling_block(self, - conv, - pool_size, - pool_stride, - pool_padding=0, - ceil_mode=True): - pool = fluid.layers.pool2d( - input=conv, - pool_size=pool_size, - pool_type='max', - pool_stride=pool_stride, - pool_padding=pool_padding, - ceil_mode=ceil_mode) - return pool diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/darknet.py b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/darknet.py deleted file mode 100644 index 37583ab29d8cd0602b0f406b52c2ce8dae1ce3e7..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/darknet.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -from ppdet.core.workspace import register - -__all__ = ['DarkNet'] - - -@register -class DarkNet(object): - """ - DarkNet, see https://pjreddie.com/darknet/yolo/ - Args: - depth (int): network depth, currently only darknet 53 is supported - norm_type (str): normalization type, 'bn' and 'sync_bn' are supported - norm_decay (float): weight decay for normalization layer weights - """ - __shared__ = ['norm_type', 'weight_prefix_name'] - - def __init__(self, - depth=53, - norm_type='bn', - norm_decay=0., - weight_prefix_name=''): - assert depth in [53], "unsupported depth value" - self.depth = depth - self.norm_type = norm_type - self.norm_decay = norm_decay - self.depth_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} - self.prefix_name = weight_prefix_name - - def _conv_norm(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.offset') - - out = fluid.layers.batch_norm( - input=conv, - act=None, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - # leaky relu here has `alpha` as 0.1, can not be set by - # `act` param in fluid.layers.batch_norm above. - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - - return out - - def _downsample(self, - input, - ch_out, - filter_size=3, - stride=2, - padding=1, - name=None): - return self._conv_norm( - input, - ch_out=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - name=name) - - def basicblock(self, input, ch_out, name=None): - conv1 = self._conv_norm( - input, - ch_out=ch_out, - filter_size=1, - stride=1, - padding=0, - name=name + ".0") - conv2 = self._conv_norm( - conv1, - ch_out=ch_out * 2, - filter_size=3, - stride=1, - padding=1, - name=name + ".1") - out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) - return out - - def layer_warp(self, block_func, input, ch_out, count, name=None): - out = block_func(input, ch_out=ch_out, name='{}.0'.format(name)) - for j in six.moves.xrange(1, count): - out = block_func(out, ch_out=ch_out, name='{}.{}'.format(name, j)) - return out - - def __call__(self, input): - """ - Get the backbone of DarkNet, that is output for the 5 stages. - - Args: - input (Variable): input variable. - - Returns: - The last variables of each stage. - """ - stages, block_func = self.depth_cfg[self.depth] - stages = stages[0:5] - conv = self._conv_norm( - input=input, - ch_out=32, - filter_size=3, - stride=1, - padding=1, - name=self.prefix_name + "yolo_input") - downsample_ = self._downsample( - input=conv, - ch_out=conv.shape[1] * 2, - name=self.prefix_name + "yolo_input.downsample") - blocks = [] - for i, stage in enumerate(stages): - block = self.layer_warp( - block_func=block_func, - input=downsample_, - ch_out=32 * 2**i, - count=stage, - name=self.prefix_name + "stage.{}".format(i)) - blocks.append(block) - if i < len(stages) - 1: # do not downsaple in the last stage - downsample_ = self._downsample( - input=block, - ch_out=block.shape[1] * 2, - name=self.prefix_name + "stage.{}.downsample".format(i)) - return blocks diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/faceboxnet.py b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/faceboxnet.py deleted file mode 100644 index 0b82c86b232a9784928167f1d226eb07562821c2..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/faceboxnet.py +++ /dev/null @@ -1,364 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr - -from ppdet.core.workspace import register - -__all__ = ['FaceBoxNet'] - - -@register -class FaceBoxNet(object): - """ - FaceBoxes, see https://https://arxiv.org/abs/1708.05234 - - Args: - with_extra_blocks (bool): whether or not extra blocks should be added - lite_edition (bool): whether or not is FaceBoxes-lite - """ - - def __init__(self, - with_extra_blocks=True, - lite_edition=False): - super(FaceBoxNet, self).__init__() - - self.with_extra_blocks = with_extra_blocks - self.lite_edition = lite_edition - - def __call__(self, input): - if self.lite_edition: - return self._simplified_edition(input) - else: - return self._original_edition(input) - - def _simplified_edition(self, input): - conv_1_1 = self._conv_norm_crelu( - input=input, - num_filters=8, - filter_size=3, - stride=2, - padding=1, - act='relu', - name="conv_1_1") - - conv_1_2 = self._conv_norm_crelu( - input=conv_1_1, - num_filters=24, - filter_size=3, - stride=2, - padding=1, - act='relu', - name="conv_1_2") - - pool1 = fluid.layers.pool2d( - input=conv_1_2, - pool_size=3, - pool_padding=1, - pool_type='avg', - name="pool_1") - - conv_2_1 = self._conv_norm( - input=pool1, - num_filters=48, - filter_size=3, - stride=2, - padding=1, - act='relu', - name="conv_2_1") - - conv_2_2 = self._conv_norm( - input=conv_2_1, - num_filters=64, - filter_size=1, - stride=1, - padding=0, - act='relu', - name="conv_2_2") - - conv_inception = conv_2_2 - - for i in range(3): - conv_inception = self._inceptionA(conv_inception, i) - - layers = [] - layers.append(conv_inception) - - conv_3_1 = self._conv_norm( - input=conv_inception, - num_filters=128, - filter_size=1, - stride=1, - padding=0, - act='relu', - name="conv_3_1") - - conv_3_2 = self._conv_norm( - input=conv_3_1, - num_filters=256, - filter_size=3, - stride=2, - padding=1, - act='relu', - name="conv_3_2") - - layers.append(conv_3_2) - - if not self.with_extra_blocks: - return layers[-1] - return layers[-2], layers[-1] - - def _original_edition(self, input): - conv_1 = self._conv_norm_crelu( - input=input, - num_filters=24, - filter_size=7, - stride=4, - padding=3, - act='relu', - name="conv_1") - - pool_1 = fluid.layers.pool2d( - input=conv_1, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max', - name="pool_1") - - conv_2 = self._conv_norm_crelu( - input=pool_1, - num_filters=64, - filter_size=5, - stride=2, - padding=2, - act='relu', - name="conv_2") - - pool_2 = fluid.layers.pool2d( - input=conv_1, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max', - name="pool_2") - - conv_inception = pool_2 - - for i in range(3): - conv_inception = self._inceptionA(conv_inception, i) - - layers = [] - layers.append(conv_inception) - - conv_3_1 = self._conv_norm( - input=conv_inception, - num_filters=128, - filter_size=1, - stride=1, - padding=0, - act='relu', - name="conv_3_1") - - conv_3_2 = self._conv_norm( - input=conv_3_1, - num_filters=256, - filter_size=3, - stride=2, - padding=1, - act='relu', - name="conv_3_2") - - layers.append(conv_3_2) - - conv_4_1 = self._conv_norm( - input=conv_3_2, - num_filters=128, - filter_size=1, - stride=1, - padding=0, - act='relu', - name="conv_4_1") - - conv_4_2 = self._conv_norm( - input=conv_4_1, - num_filters=256, - filter_size=3, - stride=2, - padding=1, - act='relu', - name="conv_4_2") - - layers.append(conv_4_2) - - if not self.with_extra_blocks: - return layers[-1] - - return layers[-3], layers[-2], layers[-1] - - def _conv_norm( - self, - input, - filter_size, - num_filters, - stride, - padding, - num_groups=1, - act='relu', - use_cudnn=True, - name=None): - parameter_attr = ParamAttr( - learning_rate=0.1, - initializer=fluid.initializer.MSRA(), - name=name + "_weights") - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=parameter_attr, - bias_attr=False) - return fluid.layers.batch_norm(input=conv, act=act) - - def _conv_norm_crelu( - self, - input, - filter_size, - num_filters, - stride, - padding, - num_groups=1, - act='relu', - use_cudnn=True, - name=None): - parameter_attr = ParamAttr( - learning_rate=0.1, - initializer=fluid.initializer.MSRA(), - name=name + "_weights") - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=parameter_attr, - bias_attr=False) - - conv_a = fluid.layers.batch_norm(input=conv, act=act) - conv_b = fluid.layers.scale(conv_a, -1) - - concat = fluid.layers.concat([conv_a, conv_b], axis=1) - - return concat - - def _pooling_block(self, - conv, - pool_size, - pool_stride, - pool_padding=0, - ceil_mode=True): - pool = fluid.layers.pool2d( - input=conv, - pool_size=pool_size, - pool_type='max', - pool_stride=pool_stride, - pool_padding=pool_padding, - ceil_mode=ceil_mode) - return pool - - def _inceptionA(self, data, idx): - idx = str(idx) - - pool1 = fluid.layers.pool2d( - input=data, - pool_size=3, - pool_padding=1, - pool_type='avg', - name='inceptionA_' + idx + '_pool1') - conv1 = self._conv_norm( - input=pool1, - filter_size=1, - num_filters=32, - stride=1, - padding=0, - act='relu', - name='inceptionA_' + idx + '_conv1') - - conv2 = self._conv_norm( - input=data, - filter_size=1, - num_filters=32, - stride=1, - padding=0, - act='relu', - name='inceptionA_' + idx + '_conv2') - - conv3 = self._conv_norm( - input=data, - filter_size=1, - num_filters=24, - stride=1, - padding=0, - act='relu', - name='inceptionA_' + idx + '_conv3_1') - conv3 = self._conv_norm( - input=conv3, - filter_size=3, - num_filters=32, - stride=1, - padding=1, - act='relu', - name='inceptionA_' + idx + '_conv3_2') - - conv4 = self._conv_norm( - input=data, - filter_size=1, - num_filters=24, - stride=1, - padding=0, - act='relu', - name='inceptionA_' + idx + '_conv4_1') - conv4 = self._conv_norm( - input=conv4, - filter_size=3, - num_filters=32, - stride=1, - padding=1, - act='relu', - name='inceptionA_' + idx + '_conv4_2') - conv4 = self._conv_norm( - input=conv4, - filter_size=3, - num_filters=32, - stride=1, - padding=1, - act='relu', - name='inceptionA_' + idx + '_conv4_3') - - concat = fluid.layers.concat( - [conv1, conv2, conv3, conv4], axis=1) - - return concat diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/fpn.py b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/fpn.py deleted file mode 100644 index 9bd491a662dd640c62668c5878d47ea48c21d223..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/fpn.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict -import copy -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Xavier -from paddle.fluid.regularizer import L2Decay - -from ppdet.core.workspace import register -from ppdet.modeling.ops import ConvNorm - -__all__ = ['FPN'] - - -@register -class FPN(object): - """ - Feature Pyramid Network, see https://arxiv.org/abs/1612.03144 - - Args: - num_chan (int): number of feature channels - min_level (int): lowest level of the backbone feature map to use - max_level (int): highest level of the backbone feature map to use - spatial_scale (list): feature map scaling factor - has_extra_convs (bool): whether has extral convolutions in higher levels - norm_type (str|None): normalization type, 'bn'/'sync_bn'/'affine_channel' - """ - __shared__ = ['norm_type', 'freeze_norm'] - - def __init__(self, - num_chan=256, - min_level=2, - max_level=6, - spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.], - has_extra_convs=False, - norm_type=None, - freeze_norm=False): - self.freeze_norm = freeze_norm - self.num_chan = num_chan - self.min_level = min_level - self.max_level = max_level - self.spatial_scale = spatial_scale - self.has_extra_convs = has_extra_convs - self.norm_type = norm_type - - def _add_topdown_lateral(self, body_name, body_input, upper_output): - lateral_name = 'fpn_inner_' + body_name + '_lateral' - topdown_name = 'fpn_topdown_' + body_name - fan = body_input.shape[1] - if self.norm_type: - initializer = Xavier(fan_out=fan) - lateral = ConvNorm( - body_input, - self.num_chan, - 1, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=lateral_name, - norm_name=lateral_name) - else: - lateral = fluid.layers.conv2d( - body_input, - self.num_chan, - 1, - param_attr=ParamAttr( - name=lateral_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=lateral_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=lateral_name) - topdown = fluid.layers.resize_nearest( - upper_output, scale=2., name=topdown_name) - - return lateral + topdown - - def get_output(self, body_dict): - """ - Add FPN onto backbone. - - Args: - body_dict(OrderedDict): Dictionary of variables and each element is the - output of backbone. - - Return: - fpn_dict(OrderedDict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - """ - spatial_scale = copy.deepcopy(self.spatial_scale) - body_name_list = list(body_dict.keys())[::-1] - num_backbone_stages = len(body_name_list) - self.fpn_inner_output = [[] for _ in range(num_backbone_stages)] - fpn_inner_name = 'fpn_inner_' + body_name_list[0] - body_input = body_dict[body_name_list[0]] - fan = body_input.shape[1] - if self.norm_type: - initializer = Xavier(fan_out=fan) - self.fpn_inner_output[0] = ConvNorm( - body_input, - self.num_chan, - 1, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=fpn_inner_name, - norm_name=fpn_inner_name) - else: - self.fpn_inner_output[0] = fluid.layers.conv2d( - body_input, - self.num_chan, - 1, - param_attr=ParamAttr( - name=fpn_inner_name + "_w", - initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_inner_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_inner_name) - for i in range(1, num_backbone_stages): - body_name = body_name_list[i] - body_input = body_dict[body_name] - top_output = self.fpn_inner_output[i - 1] - fpn_inner_single = self._add_topdown_lateral(body_name, body_input, - top_output) - self.fpn_inner_output[i] = fpn_inner_single - fpn_dict = {} - fpn_name_list = [] - for i in range(num_backbone_stages): - fpn_name = 'fpn_' + body_name_list[i] - fan = self.fpn_inner_output[i].shape[1] * 3 * 3 - if self.norm_type: - initializer = Xavier(fan_out=fan) - fpn_output = ConvNorm( - self.fpn_inner_output[i], - self.num_chan, - 3, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=fpn_name, - norm_name=fpn_name) - else: - fpn_output = fluid.layers.conv2d( - self.fpn_inner_output[i], - self.num_chan, - filter_size=3, - padding=1, - param_attr=ParamAttr( - name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_name) - fpn_dict[fpn_name] = fpn_output - fpn_name_list.append(fpn_name) - if not self.has_extra_convs and self.max_level - self.min_level == len( - spatial_scale): - body_top_name = fpn_name_list[0] - body_top_extension = fluid.layers.pool2d( - fpn_dict[body_top_name], - 1, - 'max', - pool_stride=2, - name=body_top_name + '_subsampled_2x') - fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension - fpn_name_list.insert(0, body_top_name + '_subsampled_2x') - spatial_scale.insert(0, spatial_scale[0] * 0.5) - # Coarser FPN levels introduced for RetinaNet - highest_backbone_level = self.min_level + len(spatial_scale) - 1 - if self.has_extra_convs and self.max_level > highest_backbone_level: - fpn_blob = body_dict[body_name_list[0]] - for i in range(highest_backbone_level + 1, self.max_level + 1): - fpn_blob_in = fpn_blob - fpn_name = 'fpn_' + str(i) - if i > highest_backbone_level + 1: - fpn_blob_in = fluid.layers.relu(fpn_blob) - fan = fpn_blob_in.shape[1] * 3 * 3 - fpn_blob = fluid.layers.conv2d( - input=fpn_blob_in, - num_filters=self.num_chan, - filter_size=3, - stride=2, - padding=1, - param_attr=ParamAttr( - name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_name) - fpn_dict[fpn_name] = fpn_blob - fpn_name_list.insert(0, fpn_name) - spatial_scale.insert(0, spatial_scale[0] * 0.5) - res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) - return res_dict, spatial_scale diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/mobilenet.py b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/mobilenet.py deleted file mode 100644 index 56afdf96454a994591a3d97e71b8d9619e0e68c8..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/mobilenet.py +++ /dev/null @@ -1,210 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -from ppdet.core.workspace import register - -__all__ = ['MobileNet'] - - -@register -class MobileNet(object): - """ - MobileNet v1, see https://arxiv.org/abs/1704.04861 - - Args: - norm_type (str): normalization type, 'bn' and 'sync_bn' are supported - norm_decay (float): weight decay for normalization layer weights - conv_group_scale (int): scaling factor for convolution groups - with_extra_blocks (bool): if extra blocks should be added - extra_block_filters (list): number of filter for each extra block - """ - __shared__ = ['norm_type', 'weight_prefix_name'] - - def __init__(self, - norm_type='bn', - norm_decay=0., - conv_group_scale=1, - conv_learning_rate=1.0, - with_extra_blocks=False, - extra_block_filters=[[256, 512], [128, 256], [128, 256], - [64, 128]], - weight_prefix_name=''): - self.norm_type = norm_type - self.norm_decay = norm_decay - self.conv_group_scale = conv_group_scale - self.conv_learning_rate = conv_learning_rate - self.with_extra_blocks = with_extra_blocks - self.extra_block_filters = extra_block_filters - self.prefix_name = weight_prefix_name - - def _conv_norm(self, - input, - filter_size, - num_filters, - stride, - padding, - num_groups=1, - act='relu', - use_cudnn=True, - name=None): - parameter_attr = ParamAttr( - learning_rate=self.conv_learning_rate, - initializer=fluid.initializer.MSRA(), - name=name + "_weights") - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=parameter_attr, - bias_attr=False) - - bn_name = name + "_bn" - norm_decay = self.norm_decay - bn_param_attr = ParamAttr( - regularizer=L2Decay(norm_decay), name=bn_name + '_scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(norm_decay), name=bn_name + '_offset') - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def depthwise_separable(self, - input, - num_filters1, - num_filters2, - num_groups, - stride, - scale, - name=None): - depthwise_conv = self._conv_norm( - input=input, - filter_size=3, - num_filters=int(num_filters1 * scale), - stride=stride, - padding=1, - num_groups=int(num_groups * scale), - use_cudnn=False, - name=name + "_dw") - - pointwise_conv = self._conv_norm( - input=depthwise_conv, - filter_size=1, - num_filters=int(num_filters2 * scale), - stride=1, - padding=0, - name=name + "_sep") - return pointwise_conv - - def _extra_block(self, - input, - num_filters1, - num_filters2, - num_groups, - stride, - name=None): - pointwise_conv = self._conv_norm( - input=input, - filter_size=1, - num_filters=int(num_filters1), - stride=1, - num_groups=int(num_groups), - padding=0, - name=name + "_extra1") - normal_conv = self._conv_norm( - input=pointwise_conv, - filter_size=3, - num_filters=int(num_filters2), - stride=2, - num_groups=int(num_groups), - padding=1, - name=name + "_extra2") - return normal_conv - - def __call__(self, input): - scale = self.conv_group_scale - - blocks = [] - # input 1/1 - out = self._conv_norm( - input, 3, int(32 * scale), 2, 1, name=self.prefix_name + "conv1") - # 1/2 - out = self.depthwise_separable( - out, 32, 64, 32, 1, scale, name=self.prefix_name + "conv2_1") - out = self.depthwise_separable( - out, 64, 128, 64, 2, scale, name=self.prefix_name + "conv2_2") - # 1/4 - out = self.depthwise_separable( - out, 128, 128, 128, 1, scale, name=self.prefix_name + "conv3_1") - out = self.depthwise_separable( - out, 128, 256, 128, 2, scale, name=self.prefix_name + "conv3_2") - # 1/8 - blocks.append(out) - out = self.depthwise_separable( - out, 256, 256, 256, 1, scale, name=self.prefix_name + "conv4_1") - out = self.depthwise_separable( - out, 256, 512, 256, 2, scale, name=self.prefix_name + "conv4_2") - # 1/16 - blocks.append(out) - for i in range(5): - out = self.depthwise_separable( - out, - 512, - 512, - 512, - 1, - scale, - name=self.prefix_name + "conv5_" + str(i + 1)) - module11 = out - - out = self.depthwise_separable( - out, 512, 1024, 512, 2, scale, name=self.prefix_name + "conv5_6") - # 1/32 - out = self.depthwise_separable( - out, 1024, 1024, 1024, 1, scale, name=self.prefix_name + "conv6") - module13 = out - blocks.append(out) - if not self.with_extra_blocks: - return blocks - - num_filters = self.extra_block_filters - module14 = self._extra_block(module13, num_filters[0][0], - num_filters[0][1], 1, 2, - self.prefix_name + "conv7_1") - module15 = self._extra_block(module14, num_filters[1][0], - num_filters[1][1], 1, 2, - self.prefix_name + "conv7_2") - module16 = self._extra_block(module15, num_filters[2][0], - num_filters[2][1], 1, 2, - self.prefix_name + "conv7_3") - module17 = self._extra_block(module16, num_filters[3][0], - num_filters[3][1], 1, 2, - self.prefix_name + "conv7_4") - return module11, module13, module14, module15, module16, module17 diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/name_adapter.py b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/name_adapter.py deleted file mode 100644 index 2cb16d0c9cc82d9c07988ae47c6347973add0dc8..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/name_adapter.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class NameAdapter(object): - """Fix the backbones variable names for pretrained weight""" - - def __init__(self, model): - super(NameAdapter, self).__init__() - self.model = model - - @property - def model_type(self): - return getattr(self.model, '_model_type', '') - - @property - def variant(self): - return getattr(self.model, 'variant', '') - - def fix_conv_norm_name(self, name): - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - # the naming rule is same as pretrained weight - if self.model_type == 'SEResNeXt': - bn_name = name + "_bn" - return bn_name - - def fix_shortcut_name(self, name): - if self.model_type == 'SEResNeXt': - name = 'conv' + name + '_prj' - return name - - def fix_bottleneck_name(self, name): - if self.model_type == 'SEResNeXt': - conv_name1 = 'conv' + name + '_x1' - conv_name2 = 'conv' + name + '_x2' - conv_name3 = 'conv' + name + '_x3' - shortcut_name = name - else: - conv_name1 = name + "_branch2a" - conv_name2 = name + "_branch2b" - conv_name3 = name + "_branch2c" - shortcut_name = name + "_branch1" - return conv_name1, conv_name2, conv_name3, shortcut_name - - def fix_layer_warp_name(self, stage_num, count, i): - name = 'res' + str(stage_num) - if count > 10 and stage_num == 4: - if i == 0: - conv_name = name + "a" - else: - conv_name = name + "b" + str(i) - else: - conv_name = name + chr(ord("a") + i) - if self.model_type == 'SEResNeXt': - conv_name = str(stage_num + 2) + '_' + str(i + 1) - return conv_name - - def fix_c1_stage_name(self): - return "res_conv1" if self.model_type == 'ResNeXt' else "conv1" diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/resnet.py b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/resnet.py deleted file mode 100644 index 496f5e76cbec09639c63b3b5d6499ee0d6f32598..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/resnet.py +++ /dev/null @@ -1,431 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from ppdet.core.workspace import register, serializable -from numbers import Integral - -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -@register -@serializable -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 18, 34, 50, 101, 152. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[2, 3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name=''): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [18, 34, 50, 101, 152], \ - "depth {} not in [18, 34, 50, 101, 152]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 18: ([2, 2, 2, 2], self.basicblock), - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - 101: ([3, 4, 23, 3], self.bottleneck), - 152: ([3, 8, 36, 3], self.bottleneck) - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - def _conv_offset(self, - input, - filter_size, - stride, - padding, - act=None, - name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr( - initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr( - initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split( - input=offset_mask, - num_or_sections=[offset_channel, mask_channel], - dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr( - name=bn_name + '_scale', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=bn_name + '_offset', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - ceil_mode=True, - pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [ - [int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3] - ] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut( - input, - num_filters * expand, - stride, - is_first=is_first, - name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation( - input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add( - x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self._conv_norm( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self._shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm( - input=input, - num_filters=c, - filter_size=k, - stride=s, - act='relu', - name=_name) - - output = fluid.layers.pool2d( - input=input, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - - return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) - for idx, feat in enumerate(res_endpoints)]) - - -@register -@serializable -class ResNetC5(ResNet): - __doc__ = ResNet.__doc__ - - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/resnext.py b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/resnext.py deleted file mode 100644 index 5452511376de21ccca731b0f4e08b38552fb4e9d..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/resnext.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from ppdet.core.workspace import register, serializable -from .resnet import ResNet - -__all__ = ['ResNeXt'] - - -@register -@serializable -class ResNeXt(ResNet): - """ - ResNeXt, see https://arxiv.org/abs/1611.05431 - Args: - depth (int): network depth, should be 50, 101, 152. - groups (int): group convolution cardinality - group_width (int): width of each group convolution - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of the stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - """ - - def __init__(self, - depth=50, - groups=64, - group_width=4, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=True, - variant='a', - feature_maps=[2, 3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name=''): - assert depth in [50, 101, 152], "depth {} should be 50, 101 or 152" - super(ResNeXt, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.depth_cfg = { - 50: ([3, 4, 6, 3], self.bottleneck), - 101: ([3, 4, 23, 3], self.bottleneck), - 152: ([3, 8, 36, 3], self.bottleneck) - } - self.stage_filters = [256, 512, 1024, 2048] - self.groups = groups - self.group_width = group_width - self._model_type = 'ResNeXt' - self.dcn_v2_stages = dcn_v2_stages - - -@register -@serializable -class ResNeXtC5(ResNeXt): - __doc__ = ResNeXt.__doc__ - - def __init__(self, - depth=50, - groups=64, - group_width=4, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=True, - variant='a', - feature_maps=[5], - weight_prefix_name=''): - super(ResNeXtC5, self).__init__(depth, groups, group_width, freeze_at, - norm_type, freeze_norm, norm_decay, - variant, feature_maps) - self.severed_head = True diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/senet.py b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/senet.py deleted file mode 100644 index 09c69ff199f03837b979127664d7e752db7e587a..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/senet.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr - -from ppdet.core.workspace import register, serializable -from .resnext import ResNeXt - -__all__ = ['SENet', 'SENetC5'] - - -@register -@serializable -class SENet(ResNeXt): - """ - Squeeze-and-Excitation Networks, see https://arxiv.org/abs/1709.01507 - Args: - depth (int): SENet depth, should be 50, 101, 152 - groups (int): group convolution cardinality - group_width (int): width of each group convolution - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn', 'sync_bn' or 'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of the stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - """ - - def __init__(self, - depth=50, - groups=64, - group_width=4, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='d', - feature_maps=[2, 3, 4, 5], - dcn_v2_stages=[], - std_senet=False, - weight_prefix_name=''): - super(SENet, self).__init__(depth, groups, group_width, freeze_at, - norm_type, freeze_norm, norm_decay, variant, - feature_maps) - if depth < 152: - self.stage_filters = [128, 256, 512, 1024] - else: - self.stage_filters = [256, 512, 1024, 2048] - self.reduction_ratio = 16 - self.std_senet = std_senet - self._c1_out_chan_num = 128 - self._model_type = 'SEResNeXt' - self.dcn_v2_stages = dcn_v2_stages - - def _squeeze_excitation(self, input, num_channels, name=None): - pool = fluid.layers.pool2d( - input=input, - pool_size=0, - pool_type='avg', - global_pooling=True, - use_cudnn=False) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - squeeze = fluid.layers.fc( - input=pool, - size=int(num_channels / self.reduction_ratio), - act='relu', - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name=name + '_sqz_weights'), - bias_attr=ParamAttr(name=name + '_sqz_offset')) - stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) - excitation = fluid.layers.fc( - input=squeeze, - size=num_channels, - act='sigmoid', - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name=name + '_exc_weights'), - bias_attr=ParamAttr(name=name + '_exc_offset')) - scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) - return scale - - -@register -@serializable -class SENetC5(SENet): - __doc__ = SENet.__doc__ - - def __init__(self, - depth=50, - groups=64, - group_width=4, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='d', - feature_maps=[5], - weight_prefix_name=''): - super(SENetC5, self).__init__(depth, groups, group_width, freeze_at, - norm_type, freeze_norm, norm_decay, - variant, feature_maps) - self.severed_head = True diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/vgg.py b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/vgg.py deleted file mode 100644 index 28bd29272773dd670e0ef7223814a25784c54bf1..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/vgg.py +++ /dev/null @@ -1,200 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr - -from ppdet.core.workspace import register - -__all__ = ['VGG'] - - -@register -class VGG(object): - """ - VGG, see https://arxiv.org/abs/1409.1556 - - Args: - depth (int): the VGG net depth (16 or 19) - normalizations (list): params list of init scale in l2 norm, skip init - scale if param is -1. - with_extra_blocks (bool): whether or not extra blocks should be added - extra_block_filters (list): in each extra block, params: - [in_channel, out_channel, padding_size, stride_size, filter_size] - """ - - def __init__(self, - depth=16, - with_extra_blocks=False, - normalizations=[20., -1, -1, -1, -1, -1], - extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], - [128, 256, 0, 1, 3], [128, 256, 0, 1, 3]]): - assert depth in [16, 19], \ - "depth {} not in [16, 19]" - - self.depth = depth - self.depth_cfg = { - 16: [2, 2, 3, 3, 3], - 19: [2, 2, 4, 4, 4] - } - self.with_extra_blocks = with_extra_blocks - self.normalizations = normalizations - self.extra_block_filters = extra_block_filters - - def __call__(self, input): - layers = [] - layers += self._vgg_block(input) - - if not self.with_extra_blocks: - return layers[-1] - - layers += self._add_extras_block(layers[-1]) - norm_cfg = self.normalizations - for k, v in enumerate(layers): - if not norm_cfg[k] == -1: - layers[k] = self._l2_norm_scale(v, init_scale=norm_cfg[k]) - - return layers - - def _vgg_block(self, input): - nums = self.depth_cfg[self.depth] - vgg_base = [64, 128, 256, 512, 512] - conv = input - layers = [] - for k, v in enumerate(vgg_base): - conv = self._conv_block(conv, v, nums[k], name="conv{}_".format(k + 1)) - layers.append(conv) - if k == 4: - conv = self._pooling_block(conv, 3, 1, pool_padding=1) - else: - conv = self._pooling_block(conv, 2, 2) - - fc6 = self._conv_layer(conv, 1024, 3, 1, 6, dilation=6, name="fc6") - fc7 = self._conv_layer(fc6, 1024, 1, 1, 0, name="fc7") - - return [layers[3], fc7] - - def _add_extras_block(self, input): - cfg = self.extra_block_filters - conv = input - layers = [] - for k, v in enumerate(cfg): - assert len(v) == 5, "extra_block_filters size not fix" - conv = self._extra_block(conv, v[0], v[1], - v[2], v[3], v[4], name="conv{}_".format(6 + k)) - layers.append(conv) - - return layers - - def _conv_block(self, input, num_filter, groups, name=None): - conv = input - for i in range(groups): - conv = self._conv_layer( - input=conv, - num_filters=num_filter, - filter_size=3, - stride=1, - padding=1, - act='relu', - name=name + str(i + 1)) - return conv - - def _extra_block(self, - input, - num_filters1, - num_filters2, - padding_size, - stride_size, - filter_size, - name=None): - # 1x1 conv - conv_1 = self._conv_layer( - input=input, - num_filters=int(num_filters1), - filter_size=1, - stride=1, - act='relu', - padding=0, - name=name + "1") - - # 3x3 conv - conv_2 = self._conv_layer( - input=conv_1, - num_filters=int(num_filters2), - filter_size=filter_size, - stride=stride_size, - act='relu', - padding=padding_size, - name=name + "2") - return conv_2 - - def _conv_layer(self, - input, - num_filters, - filter_size, - stride, - padding, - dilation=1, - act='relu', - use_cudnn=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - dilation=dilation, - act=act, - use_cudnn=use_cudnn, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=ParamAttr(name=name + "_biases"), - name=name + '.conv2d.output.1') - return conv - - def _pooling_block(self, - conv, - pool_size, - pool_stride, - pool_padding=0, - ceil_mode=True): - pool = fluid.layers.pool2d( - input=conv, - pool_size=pool_size, - pool_type='max', - pool_stride=pool_stride, - pool_padding=pool_padding, - ceil_mode=ceil_mode) - return pool - - def _l2_norm_scale(self, input, init_scale=1.0, channel_shared=False): - from paddle.fluid.layer_helper import LayerHelper - from paddle.fluid.initializer import Constant - helper = LayerHelper("Scale") - l2_norm = fluid.layers.l2_normalize( - input, axis=1) # l2 norm along channel - shape = [1] if channel_shared else [input.shape[1]] - scale = helper.create_parameter( - attr=helper.param_attr, - shape=shape, - dtype=input.dtype, - default_initializer=Constant(init_scale)) - out = fluid.layers.elementwise_mul( - x=l2_norm, y=scale, axis=-1 if channel_shared else 1, - name="conv4_3_norm_scale") - return out diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/model_input.py b/PaddleCV/PaddleDetection/ppdet/modeling/model_input.py deleted file mode 100644 index 6b407cfbd82d1588cbc020b3e8bf5e7d5f4b930c..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/model_input.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -from collections import OrderedDict -from ppdet.data.transform.operators import * - -from paddle import fluid - -__all__ = ['create_feed'] - -# yapf: disable -feed_var_def = [ - {'name': 'im_info', 'shape': [3], 'dtype': 'float32', 'lod_level': 0}, - {'name': 'im_id', 'shape': [1], 'dtype': 'int32', 'lod_level': 0}, - {'name': 'gt_box', 'shape': [4], 'dtype': 'float32', 'lod_level': 1}, - {'name': 'gt_label', 'shape': [1], 'dtype': 'int32', 'lod_level': 1}, - {'name': 'is_crowd', 'shape': [1], 'dtype': 'int32', 'lod_level': 1}, - {'name': 'gt_mask', 'shape': [2], 'dtype': 'float32', 'lod_level': 3}, - {'name': 'is_difficult', 'shape': [1], 'dtype': 'int32', 'lod_level': 1}, - {'name': 'gt_score', 'shape': [1], 'dtype': 'float32', 'lod_level': 0}, - {'name': 'im_shape', 'shape': [3], 'dtype': 'float32', 'lod_level': 0}, - {'name': 'im_size', 'shape': [2], 'dtype': 'int32', 'lod_level': 0}, -] -# yapf: enable - - -def create_feed(feed, use_pyreader=True, sub_prog_feed=False): - image_shape = feed.image_shape - feed_var_map = {var['name']: var for var in feed_var_def} - feed_var_map['image'] = { - 'name': 'image', - 'shape': image_shape, - 'dtype': 'float32', - 'lod_level': 0 - } - - # tensor padding with 0 is used instead of LoD tensor when - # num_max_boxes is set - if getattr(feed, 'num_max_boxes', None) is not None: - feed_var_map['gt_label']['shape'] = [feed.num_max_boxes] - feed_var_map['gt_score']['shape'] = [feed.num_max_boxes] - feed_var_map['gt_box']['shape'] = [feed.num_max_boxes, 4] - feed_var_map['is_difficult']['shape'] = [feed.num_max_boxes] - feed_var_map['gt_label']['lod_level'] = 0 - feed_var_map['gt_score']['lod_level'] = 0 - feed_var_map['gt_box']['lod_level'] = 0 - feed_var_map['is_difficult']['lod_level'] = 0 - - base_name_list = ['image'] - num_scale = getattr(feed, 'num_scale', 1) - sample_transform = feed.sample_transforms - multiscale_test = False - aug_flip = False - for t in sample_transform: - if isinstance(t, MultiscaleTestResize): - multiscale_test = True - aug_flip = t.use_flip - assert (len(t.target_size)+1)*(aug_flip+1) == num_scale, \ - "num_scale: {} is not equal to the actual number of scale: {}."\ - .format(num_scale, (len(t.target_size)+1)*(aug_flip+1)) - break - - if aug_flip: - num_scale //= 2 - base_name_list.insert(0, 'flip_image') - feed_var_map['flip_image'] = { - 'name': 'flip_image', - 'shape': image_shape, - 'dtype': 'float32', - 'lod_level': 0 - } - - image_name_list = [] - if multiscale_test: - for base_name in base_name_list: - for i in range(0, num_scale): - name = base_name if i == 0 else base_name + '_scale_' + str(i - - 1) - feed_var_map[name] = { - 'name': name, - 'shape': image_shape, - 'dtype': 'float32', - 'lod_level': 0 - } - image_name_list.append(name) - feed_var_map['im_info']['shape'] = [feed.num_scale * 3] - feed.fields = image_name_list + feed.fields[1:] - if sub_prog_feed: - box_names = ['bbox', 'bbox_flip'] - for box_name in box_names: - sub_prog_feed = { - 'name': box_name, - 'shape': [6], - 'dtype': 'float32', - 'lod_level': 1 - } - - feed.fields = feed.fields + [box_name] - feed_var_map[box_name] = sub_prog_feed - - feed_vars = OrderedDict([(key, fluid.layers.data( - name=feed_var_map[key]['name'], - shape=feed_var_map[key]['shape'], - dtype=feed_var_map[key]['dtype'], - lod_level=feed_var_map[key]['lod_level'])) for key in feed.fields]) - - pyreader = None - if use_pyreader: - pyreader = fluid.io.PyReader( - feed_list=list(feed_vars.values()), - capacity=64, - use_double_buffer=True, - iterable=False) - return pyreader, feed_vars diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/ops.py b/PaddleCV/PaddleDetection/ppdet/modeling/ops.py deleted file mode 100644 index 1312f86b49077536043ceb96cbca0f6a1c9b9ef3..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/ops.py +++ /dev/null @@ -1,349 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay -from ppdet.core.workspace import register, serializable - -__all__ = [ - 'AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'MultiClassNMS', - 'BBoxAssigner', 'MaskAssigner', 'RoIAlign', 'RoIPool', 'MultiBoxHead', - 'SSDOutputDecoder', 'RetinaTargetAssign', 'RetinaOutputDecoder', 'ConvNorm' -] - - -def ConvNorm(input, - num_filters, - filter_size, - stride=1, - groups=1, - norm_decay=0., - norm_type='affine_channel', - norm_groups=32, - dilation=1, - lr_scale=1, - freeze_norm=False, - act=None, - norm_name=None, - initializer=None, - name=None): - fan = num_filters - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=((filter_size - 1) // 2) * dilation, - dilation=dilation, - groups=groups, - act=None, - param_attr=ParamAttr( - name=name + "_weights", - initializer=initializer, - learning_rate=lr_scale), - bias_attr=False, - name=name + '.conv2d.output.1') - - norm_lr = 0. if freeze_norm else 1. - pattr = ParamAttr( - name=norm_name + '_scale', - learning_rate=norm_lr * lr_scale, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=norm_name + '_offset', - learning_rate=norm_lr * lr_scale, - regularizer=L2Decay(norm_decay)) - - if norm_type in ['bn', 'sync_bn']: - global_stats = True if freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=norm_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=norm_name + '_mean', - moving_variance_name=norm_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif norm_type == 'gn': - out = fluid.layers.group_norm( - input=conv, - act=act, - name=norm_name + '.output.1', - groups=norm_groups, - param_attr=pattr, - bias_attr=battr) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - -@register -@serializable -class AnchorGenerator(object): - __op__ = fluid.layers.anchor_generator - __append_doc__ = True - - def __init__(self, - stride=[16.0, 16.0], - anchor_sizes=[32, 64, 128, 256, 512], - aspect_ratios=[0.5, 1., 2.], - variance=[1., 1., 1., 1.]): - super(AnchorGenerator, self).__init__() - self.anchor_sizes = anchor_sizes - self.aspect_ratios = aspect_ratios - self.variance = variance - self.stride = stride - - -@register -@serializable -class RPNTargetAssign(object): - __op__ = fluid.layers.rpn_target_assign - __append_doc__ = True - - def __init__(self, - rpn_batch_size_per_im=256, - rpn_straddle_thresh=0., - rpn_fg_fraction=0.5, - rpn_positive_overlap=0.7, - rpn_negative_overlap=0.3, - use_random=True): - super(RPNTargetAssign, self).__init__() - self.rpn_batch_size_per_im = rpn_batch_size_per_im - self.rpn_straddle_thresh = rpn_straddle_thresh - self.rpn_fg_fraction = rpn_fg_fraction - self.rpn_positive_overlap = rpn_positive_overlap - self.rpn_negative_overlap = rpn_negative_overlap - self.use_random = use_random - - -@register -@serializable -class GenerateProposals(object): - __op__ = fluid.layers.generate_proposals - __append_doc__ = True - - def __init__(self, - pre_nms_top_n=6000, - post_nms_top_n=1000, - nms_thresh=.5, - min_size=.1, - eta=1.): - super(GenerateProposals, self).__init__() - self.pre_nms_top_n = pre_nms_top_n - self.post_nms_top_n = post_nms_top_n - self.nms_thresh = nms_thresh - self.min_size = min_size - self.eta = eta - - -@register -class MaskAssigner(object): - __op__ = fluid.layers.generate_mask_labels - __append_doc__ = True - __shared__ = ['num_classes'] - - def __init__(self, num_classes=81, resolution=14): - super(MaskAssigner, self).__init__() - self.num_classes = num_classes - self.resolution = resolution - - -@register -@serializable -class MultiClassNMS(object): - __op__ = fluid.layers.multiclass_nms - __append_doc__ = True - - def __init__(self, - score_threshold=.05, - nms_top_k=-1, - keep_top_k=100, - nms_threshold=.5, - normalized=False, - nms_eta=1.0, - background_label=0): - super(MultiClassNMS, self).__init__() - self.score_threshold = score_threshold - self.nms_top_k = nms_top_k - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.normalized = normalized - self.nms_eta = nms_eta - self.background_label = background_label - - -@register -class BBoxAssigner(object): - __op__ = fluid.layers.generate_proposal_labels - __append_doc__ = True - __shared__ = ['num_classes'] - - def __init__(self, - batch_size_per_im=512, - fg_fraction=.25, - fg_thresh=.5, - bg_thresh_hi=.5, - bg_thresh_lo=0., - bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], - num_classes=81, - shuffle_before_sample=True): - super(BBoxAssigner, self).__init__() - self.batch_size_per_im = batch_size_per_im - self.fg_fraction = fg_fraction - self.fg_thresh = fg_thresh - self.bg_thresh_hi = bg_thresh_hi - self.bg_thresh_lo = bg_thresh_lo - self.bbox_reg_weights = bbox_reg_weights - self.class_nums = num_classes - self.use_random = shuffle_before_sample - - -@register -class RoIAlign(object): - __op__ = fluid.layers.roi_align - __append_doc__ = True - - def __init__(self, resolution=7, spatial_scale=1. / 16, sampling_ratio=0): - super(RoIAlign, self).__init__() - if isinstance(resolution, Integral): - resolution = [resolution, resolution] - self.pooled_height = resolution[0] - self.pooled_width = resolution[1] - self.spatial_scale = spatial_scale - self.sampling_ratio = sampling_ratio - - -@register -class RoIPool(object): - __op__ = fluid.layers.roi_pool - __append_doc__ = True - - def __init__(self, resolution=7, spatial_scale=1. / 16): - super(RoIPool, self).__init__() - if isinstance(resolution, Integral): - resolution = [resolution, resolution] - self.pooled_height = resolution[0] - self.pooled_width = resolution[1] - self.spatial_scale = spatial_scale - - -@register -class MultiBoxHead(object): - __op__ = fluid.layers.multi_box_head - __append_doc__ = True - - def __init__(self, - min_ratio=20, - max_ratio=90, - base_size=300, - min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0], - max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0], - aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], - [2., 3.]], - steps=None, - offset=0.5, - flip=True, - min_max_aspect_ratios_order=False, - kernel_size=1, - pad=0): - super(MultiBoxHead, self).__init__() - self.min_ratio = min_ratio - self.max_ratio = max_ratio - self.base_size = base_size - self.min_sizes = min_sizes - self.max_sizes = max_sizes - self.aspect_ratios = aspect_ratios - self.steps = steps - self.offset = offset - self.flip = flip - self.min_max_aspect_ratios_order = min_max_aspect_ratios_order - self.kernel_size = kernel_size - self.pad = pad - - -@register -@serializable -class SSDOutputDecoder(object): - __op__ = fluid.layers.detection_output - __append_doc__ = True - - def __init__(self, - nms_threshold=0.45, - nms_top_k=400, - keep_top_k=200, - score_threshold=0.01, - nms_eta=1.0, - background_label=0): - super(SSDOutputDecoder, self).__init__() - self.nms_threshold = nms_threshold - self.background_label = background_label - self.nms_top_k = nms_top_k - self.keep_top_k = keep_top_k - self.score_threshold = score_threshold - self.nms_eta = nms_eta - - -@register -@serializable -class RetinaTargetAssign(object): - __op__ = fluid.layers.retinanet_target_assign - __append_doc__ = True - - def __init__(self, positive_overlap=0.5, negative_overlap=0.4): - super(RetinaTargetAssign, self).__init__() - self.positive_overlap = positive_overlap - self.negative_overlap = negative_overlap - - -@register -@serializable -class RetinaOutputDecoder(object): - __op__ = fluid.layers.retinanet_detection_output - __append_doc__ = True - - def __init__(self, - score_thresh=0.05, - nms_thresh=0.3, - pre_nms_top_n=1000, - detections_per_im=100, - nms_eta=1.0): - super(RetinaOutputDecoder, self).__init__() - self.score_threshold = score_thresh - self.nms_threshold = nms_thresh - self.nms_top_k = pre_nms_top_n - self.keep_top_k = detections_per_im - self.nms_eta = nms_eta diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/roi_extractors/__init__.py b/PaddleCV/PaddleDetection/ppdet/modeling/roi_extractors/__init__.py deleted file mode 100644 index 15d2525db8c8dd2f72c09641ced94a0c0864b2a0..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_extractors/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import - -from . import roi_extractor -from .roi_extractor import * diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/roi_extractors/roi_extractor.py b/PaddleCV/PaddleDetection/ppdet/modeling/roi_extractors/roi_extractor.py deleted file mode 100644 index 1caf3936f584bc0eb116d32a7e38559a917afe85..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_extractors/roi_extractor.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid - -from ppdet.core.workspace import register -from ppdet.modeling.ops import RoIAlign, RoIPool - -__all__ = ['RoIPool', 'RoIAlign', 'FPNRoIAlign'] - - -@register -class FPNRoIAlign(object): - """ - RoI align pooling for FPN feature maps - Args: - sampling_ratio (int): number of sampling points - min_level (int): lowest level of FPN layer - max_level (int): highest level of FPN layer - canconical_level (int): the canconical FPN feature map level - canonical_size (int): the canconical FPN feature map size - box_resolution (int): box resolution - mask_resolution (int): mask roi resolution - """ - - def __init__(self, - sampling_ratio=0, - min_level=2, - max_level=5, - canconical_level=4, - canonical_size=224, - box_resolution=7, - mask_resolution=14): - super(FPNRoIAlign, self).__init__() - self.sampling_ratio = sampling_ratio - self.min_level = min_level - self.max_level = max_level - self.canconical_level = canconical_level - self.canonical_size = canonical_size - self.box_resolution = box_resolution - self.mask_resolution = mask_resolution - - def __call__(self, head_inputs, rois, spatial_scale, is_mask=False): - """ - Adopt RoI align onto several level of feature maps to get RoI features. - Distribute RoIs to different levels by area and get a list of RoI - features by distributed RoIs and their corresponding feature maps. - - Returns: - roi_feat(Variable): RoI features with shape of [M, C, R, R], - where M is the number of RoIs and R is RoI resolution - - """ - k_min = self.min_level - k_max = self.max_level - num_roi_lvls = k_max - k_min + 1 - name_list = list(head_inputs.keys()) - input_name_list = name_list[-num_roi_lvls:] - spatial_scale = spatial_scale[-num_roi_lvls:] - rois_dist, restore_index = fluid.layers.distribute_fpn_proposals( - rois, k_min, k_max, self.canconical_level, self.canonical_size) - # rois_dist is in ascend order - roi_out_list = [] - resolution = is_mask and self.mask_resolution or self.box_resolution - for lvl in range(num_roi_lvls): - name_index = num_roi_lvls - lvl - 1 - rois_input = rois_dist[lvl] - head_input = head_inputs[input_name_list[name_index]] - sc = spatial_scale[name_index] - roi_out = fluid.layers.roi_align( - input=head_input, - rois=rois_input, - pooled_height=resolution, - pooled_width=resolution, - spatial_scale=sc, - sampling_ratio=self.sampling_ratio) - roi_out_list.append(roi_out) - roi_feat_shuffle = fluid.layers.concat(roi_out_list) - roi_feat_ = fluid.layers.gather(roi_feat_shuffle, restore_index) - roi_feat = fluid.layers.lod_reset(roi_feat_, rois) - - return roi_feat diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/__init__.py b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/__init__.py deleted file mode 100644 index 345a0eb3e30d69af15b8a5f0b4766e8693462e5d..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import - -from . import bbox_head -from . import mask_head -from . import cascade_head - -from .bbox_head import * -from .mask_head import * -from .cascade_head import * diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py deleted file mode 100644 index 314aeb6087e05aa1c33d26b6b838075523ff2b10..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py +++ /dev/null @@ -1,319 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal, Xavier -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import MSRA - -from ppdet.modeling.ops import MultiClassNMS -from ppdet.modeling.ops import ConvNorm -from ppdet.core.workspace import register, serializable -from ppdet.experimental import mixed_precision_global_state - -__all__ = ['BBoxHead', 'TwoFCHead', 'XConvNormHead'] - - -@register -@serializable -class BoxCoder(object): - __op__ = fluid.layers.box_coder - __append_doc__ = True - - def __init__(self, - prior_box_var=[0.1, 0.1, 0.2, 0.2], - code_type='decode_center_size', - box_normalized=False, - axis=1): - super(BoxCoder, self).__init__() - self.prior_box_var = prior_box_var - self.code_type = code_type - self.box_normalized = box_normalized - self.axis = axis - - -@register -class XConvNormHead(object): - """ - RCNN head with serveral convolution layers - - Args: - conv_num (int): num of convolution layers for the rcnn head - conv_dim (int): num of filters for the conv layers - mlp_dim (int): num of filters for the fc layers - """ - __shared__ = ['norm_type', 'freeze_norm'] - - def __init__(self, - num_conv=4, - conv_dim=256, - mlp_dim=1024, - norm_type=None, - freeze_norm=False): - super(XConvNormHead, self).__init__() - self.conv_dim = conv_dim - self.mlp_dim = mlp_dim - self.num_conv = num_conv - self.norm_type = norm_type - self.freeze_norm = freeze_norm - - def __call__(self, roi_feat): - conv = roi_feat - fan = self.conv_dim * 3 * 3 - initializer = MSRA(uniform=False, fan_in=fan) - for i in range(self.num_conv): - name = 'bbox_head_conv' + str(i) - conv = ConvNorm( - conv, - self.conv_dim, - 3, - act='relu', - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=name, - norm_name=name) - fan = conv.shape[1] * conv.shape[2] * conv.shape[3] - head_heat = fluid.layers.fc(input=conv, - size=self.mlp_dim, - act='relu', - name='fc6' + name, - param_attr=ParamAttr( - name='fc6%s_w' % name, - initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name='fc6%s_b' % name, - learning_rate=2, - regularizer=L2Decay(0.))) - return head_heat - - -@register -class TwoFCHead(object): - """ - RCNN head with two Fully Connected layers - - Args: - mlp_dim (int): num of filters for the fc layers - """ - - def __init__(self, mlp_dim=1024): - super(TwoFCHead, self).__init__() - self.mlp_dim = mlp_dim - - def __call__(self, roi_feat): - fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] - - mixed_precision_enabled = mixed_precision_global_state() is not None - - if mixed_precision_enabled: - roi_feat = fluid.layers.cast(roi_feat, 'float16') - - fc6 = fluid.layers.fc(input=roi_feat, - size=self.mlp_dim, - act='relu', - name='fc6', - param_attr=ParamAttr( - name='fc6_w', - initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name='fc6_b', - learning_rate=2., - regularizer=L2Decay(0.))) - head_feat = fluid.layers.fc(input=fc6, - size=self.mlp_dim, - act='relu', - name='fc7', - param_attr=ParamAttr( - name='fc7_w', initializer=Xavier()), - bias_attr=ParamAttr( - name='fc7_b', - learning_rate=2., - regularizer=L2Decay(0.))) - - if mixed_precision_enabled: - head_feat = fluid.layers.cast(head_feat, 'float32') - - return head_feat - - -@register -class BBoxHead(object): - """ - RCNN bbox head - - Args: - head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead` - box_coder (object): `BoxCoder` instance - nms (object): `MultiClassNMS` instance - num_classes: number of output classes - """ - __inject__ = ['head', 'box_coder', 'nms'] - __shared__ = ['num_classes'] - - def __init__(self, - head, - box_coder=BoxCoder().__dict__, - nms=MultiClassNMS().__dict__, - num_classes=81): - super(BBoxHead, self).__init__() - self.head = head - self.num_classes = num_classes - self.box_coder = box_coder - self.nms = nms - if isinstance(box_coder, dict): - self.box_coder = BoxCoder(**box_coder) - if isinstance(nms, dict): - self.nms = MultiClassNMS(**nms) - self.head_feat = None - - def get_head_feat(self, input=None): - """ - Get the bbox head feature map. - """ - - if input is not None: - feat = self.head(input) - if isinstance(feat, OrderedDict): - feat = list(feat.values())[0] - self.head_feat = feat - return self.head_feat - - def _get_output(self, roi_feat): - """ - Get bbox head output. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - - Returns: - cls_score(Variable): Output of rpn head with shape of - [N, num_anchors, H, W]. - bbox_pred(Variable): Output of rpn head with shape of - [N, num_anchors * 4, H, W]. - """ - head_feat = self.get_head_feat(roi_feat) - # when ResNetC5 output a single feature map - if not isinstance(self.head, TwoFCHead) and not isinstance( - self.head, XConvNormHead): - head_feat = fluid.layers.pool2d( - head_feat, pool_type='avg', global_pooling=True) - cls_score = fluid.layers.fc(input=head_feat, - size=self.num_classes, - act=None, - name='cls_score', - param_attr=ParamAttr( - name='cls_score_w', - initializer=Normal( - loc=0.0, scale=0.01)), - bias_attr=ParamAttr( - name='cls_score_b', - learning_rate=2., - regularizer=L2Decay(0.))) - bbox_pred = fluid.layers.fc(input=head_feat, - size=4 * self.num_classes, - act=None, - name='bbox_pred', - param_attr=ParamAttr( - name='bbox_pred_w', - initializer=Normal( - loc=0.0, scale=0.001)), - bias_attr=ParamAttr( - name='bbox_pred_b', - learning_rate=2., - regularizer=L2Decay(0.))) - return cls_score, bbox_pred - - def get_loss(self, roi_feat, labels_int32, bbox_targets, - bbox_inside_weights, bbox_outside_weights): - """ - Get bbox_head loss. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - labels_int32(Variable): Class label of a RoI with shape [P, 1]. - P is the number of RoI. - bbox_targets(Variable): Box label of a RoI with shape - [P, 4 * class_nums]. - bbox_inside_weights(Variable): Indicates whether a box should - contribute to loss. Same shape as bbox_targets. - bbox_outside_weights(Variable): Indicates whether a box should - contribute to loss. Same shape as bbox_targets. - - Return: - Type: Dict - loss_cls(Variable): bbox_head loss. - loss_bbox(Variable): bbox_head loss. - """ - - cls_score, bbox_pred = self._get_output(roi_feat) - - labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64') - labels_int64.stop_gradient = True - loss_cls = fluid.layers.softmax_with_cross_entropy( - logits=cls_score, label=labels_int64, numeric_stable_mode=True) - loss_cls = fluid.layers.reduce_mean(loss_cls) - loss_bbox = fluid.layers.smooth_l1( - x=bbox_pred, - y=bbox_targets, - inside_weight=bbox_inside_weights, - outside_weight=bbox_outside_weights, - sigma=1.0) - loss_bbox = fluid.layers.reduce_mean(loss_bbox) - return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox} - - def get_prediction(self, - roi_feat, - rois, - im_info, - im_shape, - return_box_score=False): - """ - Get prediction bounding box in test stage. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - rois (Variable): Output of generate_proposals in rpn head. - im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the - number of input images, each element consists of im_height, - im_width, im_scale. - im_shape (Variable): Actual shape of original image with shape - [B, 3]. B is the number of images, each element consists of - original_height, original_width, 1 - - Returns: - pred_result(Variable): Prediction result with shape [N, 6]. Each - row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]. - N is the total number of prediction. - """ - cls_score, bbox_pred = self._get_output(roi_feat) - - im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, rois) - boxes = rois / im_scale - cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False) - bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4)) - decoded_box = self.box_coder(prior_box=boxes, target_box=bbox_pred) - cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape) - if return_box_score: - return {'bbox': cliped_box, 'score': cls_prob} - pred_result = self.nms(bboxes=cliped_box, scores=cls_prob) - return {'bbox': pred_result} diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py deleted file mode 100644 index d36ff4c7541d8825e9491e696f787e10ef95b97e..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py +++ /dev/null @@ -1,321 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal, Xavier -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import MSRA - -from ppdet.modeling.ops import MultiClassNMS -from ppdet.modeling.ops import ConvNorm -from ppdet.core.workspace import register - -__all__ = ['CascadeBBoxHead'] - - -@register -class CascadeBBoxHead(object): - """ - Cascade RCNN bbox head - - Args: - head (object): the head module instance - nms (object): `MultiClassNMS` instance - num_classes: number of output classes - """ - __inject__ = ['head', 'nms'] - __shared__ = ['num_classes'] - - def __init__(self, head, nms=MultiClassNMS().__dict__, num_classes=81): - super(CascadeBBoxHead, self).__init__() - self.head = head - self.nms = nms - self.num_classes = num_classes - if isinstance(nms, dict): - self.nms = MultiClassNMS(**nms) - - def get_output(self, - roi_feat, - cls_agnostic_bbox_reg=2, - wb_scalar=1.0, - name=''): - """ - Get bbox head output. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - cls_agnostic_bbox_reg(Int): BBox regressor are class agnostic. - wb_scalar(Float): Weights and Bias's learning rate. - name(String): Layer's name - - Returns: - cls_score(Variable): cls score. - bbox_pred(Variable): bbox regression. - """ - head_feat = self.head(roi_feat, wb_scalar, name) - cls_score = fluid.layers.fc(input=head_feat, - size=self.num_classes, - act=None, - name='cls_score' + name, - param_attr=ParamAttr( - name='cls_score%s_w' % name, - initializer=Normal( - loc=0.0, scale=0.01), - learning_rate=wb_scalar), - bias_attr=ParamAttr( - name='cls_score%s_b' % name, - learning_rate=wb_scalar * 2, - regularizer=L2Decay(0.))) - bbox_pred = fluid.layers.fc(input=head_feat, - size=4 * cls_agnostic_bbox_reg, - act=None, - name='bbox_pred' + name, - param_attr=ParamAttr( - name='bbox_pred%s_w' % name, - initializer=Normal( - loc=0.0, scale=0.001), - learning_rate=wb_scalar), - bias_attr=ParamAttr( - name='bbox_pred%s_b' % name, - learning_rate=wb_scalar * 2, - regularizer=L2Decay(0.))) - return cls_score, bbox_pred - - def get_loss(self, rcnn_pred_list, rcnn_target_list, rcnn_loss_weight_list): - """ - Get bbox_head loss. - - Args: - rcnn_pred_list(List): Cascade RCNN's head's output including - bbox_pred and cls_score - rcnn_target_list(List): Cascade rcnn's bbox and label target - rcnn_loss_weight_list(List): The weight of location and class loss - - Return: - loss_cls(Variable): bbox_head loss. - loss_bbox(Variable): bbox_head loss. - """ - loss_dict = {} - for i, (rcnn_pred, rcnn_target - ) in enumerate(zip(rcnn_pred_list, rcnn_target_list)): - labels_int64 = fluid.layers.cast(x=rcnn_target[1], dtype='int64') - labels_int64.stop_gradient = True - - loss_cls = fluid.layers.softmax_with_cross_entropy( - logits=rcnn_pred[0], - label=labels_int64, - numeric_stable_mode=True, ) - loss_cls = fluid.layers.reduce_mean( - loss_cls, name='loss_cls_' + str(i)) * rcnn_loss_weight_list[i] - - loss_bbox = fluid.layers.smooth_l1( - x=rcnn_pred[1], - y=rcnn_target[2], - inside_weight=rcnn_target[3], - outside_weight=rcnn_target[4], - sigma=1.0, # detectron use delta = 1./sigma**2 - ) - loss_bbox = fluid.layers.reduce_mean( - loss_bbox, - name='loss_bbox_' + str(i)) * rcnn_loss_weight_list[i] - - loss_dict['loss_cls_%d' % i] = loss_cls - loss_dict['loss_loc_%d' % i] = loss_bbox - - return loss_dict - - def get_prediction(self, - im_info, - im_shape, - roi_feat_list, - rcnn_pred_list, - proposal_list, - cascade_bbox_reg_weights, - cls_agnostic_bbox_reg=2, - return_box_score=False): - """ - Get prediction bounding box in test stage. - : - Args: - im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the - number of input images, each element consists - of im_height, im_width, im_scale. - im_shape (Variable): Actual shape of original image with shape - [B, 3]. B is the number of images, each element consists of - original_height, original_width, 1 - rois_feat_list (List): RoI feature from RoIExtractor. - rcnn_pred_list (Variable): Cascade rcnn's head's output - including bbox_pred and cls_score - proposal_list (List): RPN proposal boxes. - cascade_bbox_reg_weights (List): BBox decode var. - cls_agnostic_bbox_reg(Int): BBox regressor are class agnostic - - Returns: - pred_result(Variable): Prediction result with shape [N, 6]. Each - row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]. - N is the total number of prediction. - """ - self.im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) - boxes_cls_prob_l = [] - - rcnn_pred = rcnn_pred_list[-1] # stage 3 - repreat_num = 1 - repreat_num = 3 - bbox_reg_w = cascade_bbox_reg_weights[-1] - for i in range(repreat_num): - # cls score - if i < 2: - cls_score, _ = self.get_output( - roi_feat_list[-1], # roi_feat_3 - name='_' + str(i + 1) if i > 0 else '') - else: - cls_score = rcnn_pred[0] - cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False) - boxes_cls_prob_l.append(cls_prob) - - boxes_cls_prob_mean = ( - boxes_cls_prob_l[0] + boxes_cls_prob_l[1] + boxes_cls_prob_l[2] - ) / 3.0 - - # bbox pred - proposals_boxes = proposal_list[-1] - im_scale_lod = fluid.layers.sequence_expand(self.im_scale, - proposals_boxes) - proposals_boxes = proposals_boxes / im_scale_lod - bbox_pred = rcnn_pred[1] - bbox_pred_new = fluid.layers.reshape(bbox_pred, - (-1, cls_agnostic_bbox_reg, 4)) - if cls_agnostic_bbox_reg == 2: - # only use fg box delta to decode box - bbox_pred_new = fluid.layers.slice( - bbox_pred_new, axes=[1], starts=[1], ends=[2]) - bbox_pred_new = fluid.layers.expand(bbox_pred_new, - [1, self.num_classes, 1]) - decoded_box = fluid.layers.box_coder( - prior_box=proposals_boxes, - prior_box_var=bbox_reg_w, - target_box=bbox_pred_new, - code_type='decode_center_size', - box_normalized=False, - axis=1) - - box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_shape) - if return_box_score: - return {'bbox': box_out, 'score': boxes_cls_prob_mean} - pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean) - return {"bbox": pred_result} - - -@register -class CascadeXConvNormHead(object): - """ - RCNN head with serveral convolution layers - - Args: - conv_num (int): num of convolution layers for the rcnn head - conv_dim (int): num of filters for the conv layers - mlp_dim (int): num of filters for the fc layers - """ - __shared__ = ['norm_type', 'freeze_norm'] - - def __init__(self, - num_conv=4, - conv_dim=256, - mlp_dim=1024, - norm_type=None, - freeze_norm=False): - super(CascadeXConvNormHead, self).__init__() - self.conv_dim = conv_dim - self.mlp_dim = mlp_dim - self.num_conv = num_conv - self.norm_type = norm_type - self.freeze_norm = freeze_norm - - def __call__(self, roi_feat, wb_scalar=1.0, name=''): - conv = roi_feat - fan = self.conv_dim * 3 * 3 - initializer = MSRA(uniform=False, fan_in=fan) - for i in range(self.num_conv): - name = 'bbox_head_conv' + str(i) - conv = ConvNorm( - conv, - self.conv_dim, - 3, - act='relu', - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - lr_scale=wb_scalar, - name=name, - norm_name=name) - fan = conv.shape[1] * conv.shape[2] * conv.shape[3] - head_heat = fluid.layers.fc(input=conv, - size=self.mlp_dim, - act='relu', - name='fc6' + name, - param_attr=ParamAttr( - name='fc6%s_w' % name, - initializer=Xavier(fan_out=fan), - learning_rate=wb_scalar), - bias_attr=ParamAttr( - name='fc6%s_b' % name, - regularizer=L2Decay(0.), - learning_rate=wb_scalar * 2)) - return head_heat - - -@register -class CascadeTwoFCHead(object): - """ - RCNN head with serveral convolution layers - - Args: - mlp_dim (int): num of filters for the fc layers - """ - - def __init__(self, mlp_dim): - super(CascadeTwoFCHead, self).__init__() - self.mlp_dim = mlp_dim - - def __call__(self, roi_feat, wb_scalar=1.0, name=''): - fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] - fc6 = fluid.layers.fc(input=roi_feat, - size=self.mlp_dim, - act='relu', - name='fc6' + name, - param_attr=ParamAttr( - name='fc6%s_w' % name, - initializer=Xavier(fan_out=fan), - learning_rate=wb_scalar), - bias_attr=ParamAttr( - name='fc6%s_b' % name, - learning_rate=wb_scalar * 2, - regularizer=L2Decay(0.))) - head_feat = fluid.layers.fc(input=fc6, - size=self.mlp_dim, - act='relu', - name='fc7' + name, - param_attr=ParamAttr( - name='fc7%s_w' % name, - initializer=Xavier(), - learning_rate=wb_scalar), - bias_attr=ParamAttr( - name='fc7%s_b' % name, - learning_rate=wb_scalar * 2, - regularizer=L2Decay(0.))) - return head_feat diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/mask_head.py b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/mask_head.py deleted file mode 100644 index f61add0402664113de83e5537acae459260481ee..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/mask_head.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import MSRA -from paddle.fluid.regularizer import L2Decay - -from ppdet.core.workspace import register -from ppdet.modeling.ops import ConvNorm - -__all__ = ['MaskHead'] - - -@register -class MaskHead(object): - """ - RCNN mask head - Args: - num_convs (int): num of convolutions, 4 for FPN, 1 otherwise - conv_dim (int): num of channels after first convolution - resolution (int): size of the output mask - dilation (int): dilation rate - num_classes (int): number of output classes - """ - - __shared__ = ['num_classes'] - - def __init__(self, - num_convs=0, - conv_dim=256, - resolution=14, - dilation=1, - num_classes=81, - norm_type=None): - super(MaskHead, self).__init__() - self.num_convs = num_convs - self.conv_dim = conv_dim - self.resolution = resolution - self.dilation = dilation - self.num_classes = num_classes - self.norm_type = norm_type - - def _mask_conv_head(self, roi_feat, num_convs, norm_type): - if norm_type == 'gn': - for i in range(num_convs): - layer_name = "mask_inter_feat_" + str(i + 1) - fan = self.conv_dim * 3 * 3 - initializer = MSRA(uniform=False, fan_in=fan) - roi_feat = ConvNorm( - roi_feat, - self.conv_dim, - 3, - act='relu', - dilation=self.dilation, - initializer=initializer, - norm_type=self.norm_type, - name=layer_name, - norm_name=layer_name) - else: - for i in range(num_convs): - layer_name = "mask_inter_feat_" + str(i + 1) - fan = self.conv_dim * 3 * 3 - initializer = MSRA(uniform=False, fan_in=fan) - roi_feat = fluid.layers.conv2d( - input=roi_feat, - num_filters=self.conv_dim, - filter_size=3, - padding=1 * self.dilation, - act='relu', - stride=1, - dilation=self.dilation, - name=layer_name, - param_attr=ParamAttr( - name=layer_name + '_w', initializer=initializer), - bias_attr=ParamAttr( - name=layer_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - fan = roi_feat.shape[1] * 2 * 2 - feat = fluid.layers.conv2d_transpose( - input=roi_feat, - num_filters=self.conv_dim, - filter_size=2, - stride=2, - act='relu', - param_attr=ParamAttr( - name='conv5_mask_w', - initializer=MSRA( - uniform=False, fan_in=fan)), - bias_attr=ParamAttr( - name='conv5_mask_b', learning_rate=2., regularizer=L2Decay(0.))) - return feat - - def _get_output(self, roi_feat): - class_num = self.num_classes - # configure the conv number for FPN if necessary - head_feat = self._mask_conv_head(roi_feat, self.num_convs, - self.norm_type) - fan = class_num - mask_logits = fluid.layers.conv2d( - input=head_feat, - num_filters=class_num, - filter_size=1, - act=None, - param_attr=ParamAttr( - name='mask_fcn_logits_w', - initializer=MSRA( - uniform=False, fan_in=fan)), - bias_attr=ParamAttr( - name="mask_fcn_logits_b", - learning_rate=2., - regularizer=L2Decay(0.))) - return mask_logits - - def get_loss(self, roi_feat, mask_int32): - mask_logits = self._get_output(roi_feat) - num_classes = self.num_classes - resolution = self.resolution - dim = num_classes * resolution * resolution - mask_logits = fluid.layers.reshape(mask_logits, (-1, dim)) - - mask_label = fluid.layers.cast(x=mask_int32, dtype='float32') - mask_label.stop_gradient = True - loss_mask = fluid.layers.sigmoid_cross_entropy_with_logits( - x=mask_logits, label=mask_label, ignore_index=-1, normalize=True) - loss_mask = fluid.layers.reduce_sum(loss_mask, name='loss_mask') - return {'loss_mask': loss_mask} - - def get_prediction(self, roi_feat, bbox_pred): - """ - Get prediction mask in test stage. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - bbox_pred (Variable): predicted bbox. - - Returns: - mask_pred (Variable): Prediction mask with shape - [N, num_classes, resolution, resolution]. - """ - mask_logits = self._get_output(roi_feat) - mask_prob = fluid.layers.sigmoid(mask_logits) - mask_prob = fluid.layers.lod_reset(mask_prob, bbox_pred) - return mask_prob diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/target_assigners.py b/PaddleCV/PaddleDetection/ppdet/modeling/target_assigners.py deleted file mode 100644 index 57d7bd738d2ee730effa3406df04fddedf1e5cd8..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/target_assigners.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid - -from ppdet.core.workspace import register -from ppdet.modeling.ops import BBoxAssigner, MaskAssigner - -__all__ = ['BBoxAssigner', 'MaskAssigner', 'CascadeBBoxAssigner'] - - -@register -class CascadeBBoxAssigner(object): - __shared__ = ['num_classes'] - - def __init__(self, - batch_size_per_im=512, - fg_fraction=.25, - fg_thresh=[0.5, 0.6, 0.7], - bg_thresh_hi=[0.5, 0.6, 0.7], - bg_thresh_lo=[0., 0., 0.], - bbox_reg_weights=[10, 20, 30], - num_classes=81, - shuffle_before_sample=True): - super(CascadeBBoxAssigner, self).__init__() - self.batch_size_per_im = batch_size_per_im - self.fg_fraction = fg_fraction - self.fg_thresh = fg_thresh - self.bg_thresh_hi = bg_thresh_hi - self.bg_thresh_lo = bg_thresh_lo - self.bbox_reg_weights = bbox_reg_weights - self.class_nums = num_classes - self.use_random = shuffle_before_sample - - def __call__(self, input_rois, feed_vars, curr_stage): - - curr_bbox_reg_w = [ - 1. / self.bbox_reg_weights[curr_stage], - 1. / self.bbox_reg_weights[curr_stage], - 2. / self.bbox_reg_weights[curr_stage], - 2. / self.bbox_reg_weights[curr_stage], - ] - outs = fluid.layers.generate_proposal_labels( - rpn_rois=input_rois, - gt_classes=feed_vars['gt_label'], - is_crowd=feed_vars['is_crowd'], - gt_boxes=feed_vars['gt_box'], - im_info=feed_vars['im_info'], - batch_size_per_im=self.batch_size_per_im, - fg_thresh=self.fg_thresh[curr_stage], - bg_thresh_hi=self.bg_thresh_hi[curr_stage], - bg_thresh_lo=self.bg_thresh_lo[curr_stage], - bbox_reg_weights=curr_bbox_reg_w, - use_random=self.use_random, - class_nums=2, - is_cls_agnostic=True, - is_cascade_rcnn=True if curr_stage > 0 else False) - return outs diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/tests/__init__.py b/PaddleCV/PaddleDetection/ppdet/modeling/tests/__init__.py deleted file mode 100644 index 33ed0ecf10ec4cad807ebb6df1590de65eeeab1e..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/tests/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/tests/decorator_helper.py b/PaddleCV/PaddleDetection/ppdet/modeling/tests/decorator_helper.py deleted file mode 100644 index 894833ce15eab82ea06c2e66a8e53cb2e7e057b5..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/tests/decorator_helper.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.fluid as fluid - -__all__ = ['prog_scope'] - - -def prog_scope(): - def __impl__(fn): - def __fn__(*args, **kwargs): - prog = fluid.Program() - startup_prog = fluid.Program() - scope = fluid.core.Scope() - with fluid.scope_guard(scope): - with fluid.program_guard(prog, startup_prog): - with fluid.unique_name.guard(): - fn(*args, **kwargs) - - return __fn__ - - return __impl__ diff --git a/PaddleCV/PaddleDetection/ppdet/modeling/tests/test_architectures.py b/PaddleCV/PaddleDetection/ppdet/modeling/tests/test_architectures.py deleted file mode 100644 index 7df9580666858a834506a9a0beac742e548266f5..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/modeling/tests/test_architectures.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import unittest -import numpy as np - -import paddle.fluid as fluid - -from ppdet.modeling.tests.decorator_helper import prog_scope -from ppdet.core.workspace import load_config, merge_config, create -from ppdet.modeling.model_input import create_feed - - -class TestFasterRCNN(unittest.TestCase): - def setUp(self): - self.set_config() - self.cfg = load_config(self.cfg_file) - self.detector_type = self.cfg['architecture'] - - def set_config(self): - self.cfg_file = 'configs/faster_rcnn_r50_1x.yml' - - @prog_scope() - def test_train(self): - train_feed = create(self.cfg['train_feed']) - model = create(self.detector_type) - _, feed_vars = create_feed(train_feed) - train_fetches = model.train(feed_vars) - - @prog_scope() - def test_test(self): - test_feed = create(self.cfg['eval_feed']) - model = create(self.detector_type) - _, feed_vars = create_feed(test_feed) - test_fetches = model.eval(feed_vars) - - -class TestMaskRCNN(TestFasterRCNN): - def set_config(self): - self.cfg_file = 'configs/mask_rcnn_r50_1x.yml' - - -class TestCascadeRCNN(TestFasterRCNN): - def set_config(self): - self.cfg_file = 'configs/cascade_rcnn_r50_fpn_1x.yml' - - -class TestYolov3(TestFasterRCNN): - def set_config(self): - self.cfg_file = 'configs/yolov3_darknet.yml' - - -class TestRetinaNet(TestFasterRCNN): - def set_config(self): - self.cfg_file = 'configs/retinanet_r50_fpn_1x.yml' - - -class TestSSD(TestFasterRCNN): - def set_config(self): - self.cfg_file = 'configs/ssd/ssd_mobilenet_v1_voc.yml' - - -if __name__ == '__main__': - unittest.main() diff --git a/PaddleCV/PaddleDetection/ppdet/optimizer.py b/PaddleCV/PaddleDetection/ppdet/optimizer.py deleted file mode 100644 index e695aad043b8778128f1b6870a27d2b0b3fe6adc..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/optimizer.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import logging - -from paddle import fluid - -import paddle.fluid.optimizer as optimizer -import paddle.fluid.regularizer as regularizer - -from ppdet.core.workspace import register, serializable - -__all__ = ['LearningRate', 'OptimizerBuilder'] - -logger = logging.getLogger(__name__) - - -@serializable -class PiecewiseDecay(object): - """ - Multi step learning rate decay - - Args: - gamma (float): decay factor - milestones (list): steps at which to decay learning rate - """ - - def __init__(self, gamma=0.1, milestones=[60000, 80000], values=None): - super(PiecewiseDecay, self).__init__() - self.gamma = gamma - self.milestones = milestones - self.values = values - - def __call__(self, base_lr=None, learning_rate=None): - if self.values is not None: - return fluid.layers.piecewise_decay(self.milestones, self.values) - assert base_lr is not None, "either base LR or values should be provided" - values = [base_lr] - lr = base_lr - for _ in self.milestones: - lr *= self.gamma - values.append(lr) - return fluid.layers.piecewise_decay(self.milestones, values) - - -@serializable -class LinearWarmup(object): - """ - Warm up learning rate linearly - - Args: - steps (int): warm up steps - start_factor (float): initial learning rate factor - """ - - def __init__(self, steps=500, start_factor=1. / 3): - super(LinearWarmup, self).__init__() - self.steps = steps - self.start_factor = start_factor - - def __call__(self, base_lr, learning_rate): - start_lr = base_lr * self.start_factor - - return fluid.layers.linear_lr_warmup( - learning_rate=learning_rate, - warmup_steps=self.steps, - start_lr=start_lr, - end_lr=base_lr) - - -@register -class LearningRate(object): - """ - Learning Rate configuration - - Args: - base_lr (float): base learning rate - schedulers (list): learning rate schedulers - """ - __category__ = 'optim' - - def __init__(self, - base_lr=0.01, - schedulers=[PiecewiseDecay(), LinearWarmup()]): - super(LearningRate, self).__init__() - self.base_lr = base_lr - self.schedulers = schedulers - - def __call__(self): - lr = None - for sched in self.schedulers: - lr = sched(self.base_lr, lr) - return lr - - -@register -class OptimizerBuilder(): - """ - Build optimizer handles - - Args: - regularizer (object): an `Regularizer` instance - optimizer (object): an `Optimizer` instance - """ - __category__ = 'optim' - - def __init__(self, - regularizer={'type': 'L2', - 'factor': .0001}, - optimizer={'type': 'Momentum', - 'momentum': .9}): - self.regularizer = regularizer - self.optimizer = optimizer - - def __call__(self, learning_rate): - reg_type = self.regularizer['type'] + 'Decay' - reg_factor = self.regularizer['factor'] - regularization = getattr(regularizer, reg_type)(reg_factor) - - optim_args = self.optimizer.copy() - optim_type = optim_args['type'] - del optim_args['type'] - op = getattr(optimizer, optim_type) - return op(learning_rate=learning_rate, - regularization=regularization, - **optim_args) diff --git a/PaddleCV/PaddleDetection/ppdet/utils/__init__.py b/PaddleCV/PaddleDetection/ppdet/utils/__init__.py deleted file mode 100644 index d0c32e26092f6ea25771279418582a24ea449ab2..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/PaddleCV/PaddleDetection/ppdet/utils/check.py b/PaddleCV/PaddleDetection/ppdet/utils/check.py deleted file mode 100644 index 9e816eaadcbf21b48e80ab6e607bff97269e7c39..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/check.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import sys - -import paddle.fluid as fluid - -import logging -logger = logging.getLogger(__name__) - -__all__ = ['check_gpu'] - - -def check_gpu(use_gpu): - """ - Log error and exit when set use_gpu=true in paddlepaddle - cpu version. - """ - err = "Config use_gpu cannot be set as true while you are " \ - "using paddlepaddle cpu version ! \nPlease try: \n" \ - "\t1. Install paddlepaddle-gpu to run model on GPU \n" \ - "\t2. Set use_gpu as false in config file to run " \ - "model on CPU" - - try: - if use_gpu and not fluid.is_compiled_with_cuda(): - logger.error(err) - sys.exit(1) - except Exception as e: - pass - diff --git a/PaddleCV/PaddleDetection/ppdet/utils/checkpoint.py b/PaddleCV/PaddleDetection/ppdet/utils/checkpoint.py deleted file mode 100644 index 54c364812b041280148a1d0b85543209dc449724..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/checkpoint.py +++ /dev/null @@ -1,291 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import errno -import os -import shutil -import time -import numpy as np -import re -import paddle.fluid as fluid - -from .download import get_weights_path - -import logging -logger = logging.getLogger(__name__) - -__all__ = [ - 'load_checkpoint', - 'load_and_fusebn', - 'load_params', - 'save', -] - - -def is_url(path): - """ - Whether path is URL. - Args: - path (string): URL string or not. - """ - return path.startswith('http://') or path.startswith('https://') - - -def _get_weight_path(path): - env = os.environ - if 'PADDLE_TRAINERS_NUM' in env and 'PADDLE_TRAINER_ID' in env: - trainer_id = int(env['PADDLE_TRAINER_ID']) - num_trainers = int(env['PADDLE_TRAINERS_NUM']) - if num_trainers <= 1: - path = get_weights_path(path) - else: - from ppdet.utils.download import map_path, WEIGHTS_HOME - weight_path = map_path(path, WEIGHTS_HOME) - lock_path = weight_path + '.lock' - if not os.path.exists(weight_path): - try: - os.makedirs(os.path.dirname(weight_path)) - except OSError as e: - if e.errno != errno.EEXIST: - raise - with open(lock_path, 'w'): # touch - os.utime(lock_path, None) - if trainer_id == 0: - get_weights_path(path) - os.remove(lock_path) - else: - while os.path.exists(lock_path): - time.sleep(1) - path = weight_path - else: - path = get_weights_path(path) - return path - - -def load_params(exe, prog, path, ignore_params=[]): - """ - Load model from the given path. - Args: - exe (fluid.Executor): The fluid.Executor object. - prog (fluid.Program): load weight to which Program object. - path (string): URL string or loca model path. - ignore_params (bool): ignore variable to load when finetuning. - It can be specified by finetune_exclude_pretrained_params - and the usage can refer to docs/TRANSFER_LEARNING.md - """ - - if is_url(path): - path = _get_weight_path(path) - - if not os.path.exists(path): - raise ValueError("Model pretrain path {} does not " - "exists.".format(path)) - - logger.info('Loading parameters from {}...'.format(path)) - - def _if_exist(var): - do_ignore = False - param_exist = os.path.exists(os.path.join(path, var.name)) - if len(ignore_params) > 0: - # Parameter related to num_classes will be ignored in finetuning - do_ignore_list = [ - bool(re.match(name, var.name)) for name in ignore_params - ] - do_ignore = any(do_ignore_list) - if do_ignore and param_exist: - logger.info('In load_params, ignore {}'.format(var.name)) - do_load = param_exist and not do_ignore - if do_load: - logger.debug('load weight {}'.format(var.name)) - return do_load - - fluid.io.load_vars(exe, path, prog, predicate=_if_exist) - - -def load_checkpoint(exe, prog, path): - """ - Load model from the given path. - Args: - exe (fluid.Executor): The fluid.Executor object. - prog (fluid.Program): load weight to which Program object. - path (string): URL string or loca model path. - """ - if is_url(path): - path = _get_weight_path(path) - - if not os.path.exists(path): - raise ValueError("Model checkpoint path {} does not " - "exists.".format(path)) - - logger.info('Loading checkpoint from {}...'.format(path)) - fluid.io.load_persistables(exe, path, prog) - - -def global_step(scope=None): - """ - Load global step in scope. - Args: - scope (fluid.Scope): load global step from which scope. If None, - from default global_scope(). - - Returns: - global step: int. - """ - if scope is None: - scope = fluid.global_scope() - v = scope.find_var('@LR_DECAY_COUNTER@') - step = np.array(v.get_tensor())[0] if v else 0 - return step - - -def save(exe, prog, path): - """ - Load model from the given path. - Args: - exe (fluid.Executor): The fluid.Executor object. - prog (fluid.Program): save weight from which Program object. - path (string): the path to save model. - """ - if os.path.isdir(path): - shutil.rmtree(path) - logger.info('Save model to {}.'.format(path)) - fluid.io.save_persistables(exe, path, prog) - - -def load_and_fusebn(exe, prog, path): - """ - Fuse params of batch norm to scale and bias. - - Args: - exe (fluid.Executor): The fluid.Executor object. - prog (fluid.Program): save weight from which Program object. - path (string): the path to save model. - """ - logger.info('Load model and fuse batch norm if have from {}...'.format( - path)) - - if is_url(path): - path = _get_weight_path(path) - - if not os.path.exists(path): - raise ValueError("Model path {} does not exists.".format(path)) - - def _if_exist(var): - b = os.path.exists(os.path.join(path, var.name)) - - if b: - logger.debug('load weight {}'.format(var.name)) - return b - - all_vars = list(filter(_if_exist, prog.list_vars())) - - # Since the program uses affine-channel, there is no running mean and var - # in the program, here append running mean and var. - # NOTE, the params of batch norm should be like: - # x_scale - # x_offset - # x_mean - # x_variance - # x is any prefix - mean_variances = set() - bn_vars = [] - - bn_in_path = True - - inner_prog = fluid.Program() - inner_start_prog = fluid.Program() - inner_block = inner_prog.global_block() - with fluid.program_guard(inner_prog, inner_start_prog): - for block in prog.blocks: - ops = list(block.ops) - if not bn_in_path: - break - for op in ops: - if op.type == 'affine_channel': - # remove 'scale' as prefix - scale_name = op.input('Scale')[0] # _scale - bias_name = op.input('Bias')[0] # _offset - prefix = scale_name[:-5] - mean_name = prefix + 'mean' - variance_name = prefix + 'variance' - - if not os.path.exists(os.path.join(path, mean_name)): - bn_in_path = False - break - if not os.path.exists(os.path.join(path, variance_name)): - bn_in_path = False - break - - bias = block.var(bias_name) - - mean_vb = inner_block.create_var( - name=mean_name, - type=bias.type, - shape=bias.shape, - dtype=bias.dtype, - persistable=True) - variance_vb = inner_block.create_var( - name=variance_name, - type=bias.type, - shape=bias.shape, - dtype=bias.dtype, - persistable=True) - - mean_variances.add(mean_vb) - mean_variances.add(variance_vb) - - bn_vars.append( - [scale_name, bias_name, mean_name, variance_name]) - - if not bn_in_path: - fluid.io.load_vars(exe, path, prog, vars=all_vars) - logger.warning( - "There is no paramters of batch norm in model {}. " - "Skip to fuse batch norm. And load paramters done.".format(path)) - return - - # load running mean and running variance on cpu place into global scope. - place = fluid.CPUPlace() - exe_cpu = fluid.Executor(place) - fluid.io.load_vars(exe_cpu, path, vars=[v for v in mean_variances]) - - # load params on real place into global scope. - fluid.io.load_vars(exe, path, prog, vars=all_vars) - - eps = 1e-5 - for names in bn_vars: - scale_name, bias_name, mean_name, var_name = names - - scale = fluid.global_scope().find_var(scale_name).get_tensor() - bias = fluid.global_scope().find_var(bias_name).get_tensor() - mean = fluid.global_scope().find_var(mean_name).get_tensor() - var = fluid.global_scope().find_var(var_name).get_tensor() - - scale_arr = np.array(scale) - bias_arr = np.array(bias) - mean_arr = np.array(mean) - var_arr = np.array(var) - - bn_std = np.sqrt(np.add(var_arr, eps)) - new_scale = np.float32(np.divide(scale_arr, bn_std)) - new_bias = bias_arr - mean_arr * new_scale - - # fuse to scale and bias in affine_channel - scale.set(new_scale, exe.place) - bias.set(new_bias, exe.place) diff --git a/PaddleCV/PaddleDetection/ppdet/utils/cli.py b/PaddleCV/PaddleDetection/ppdet/utils/cli.py deleted file mode 100644 index b8ba59d78f1ddf606012fd0cf6d71a71d79eea05..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/cli.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from argparse import ArgumentParser, RawDescriptionHelpFormatter - -import yaml -import re -from ppdet.core.workspace import get_registered_modules, dump_value - -__all__ = ['ColorTTY', 'ArgsParser'] - - -class ColorTTY(object): - def __init__(self): - super(ColorTTY, self).__init__() - self.colors = ['red', 'green', 'yellow', 'blue', 'magenta', 'cyan'] - - def __getattr__(self, attr): - if attr in self.colors: - color = self.colors.index(attr) + 31 - - def color_message(message): - return "[{}m{}".format(color, message) - - setattr(self, attr, color_message) - return color_message - - def bold(self, message): - return self.with_code('01', message) - - def with_code(self, code, message): - return "[{}m{}".format(code, message) - - -class ArgsParser(ArgumentParser): - def __init__(self): - super(ArgsParser, self).__init__( - formatter_class=RawDescriptionHelpFormatter) - self.add_argument("-c", "--config", help="configuration file to use") - self.add_argument( - "-o", "--opt", nargs='*', help="set configuration options") - - def parse_args(self, argv=None): - args = super(ArgsParser, self).parse_args(argv) - assert args.config is not None, \ - "Please specify --config=configure_file_path." - args.opt = self._parse_opt(args.opt) - return args - - def _parse_opt(self, opts): - config = {} - if not opts: - return config - for s in opts: - s = s.strip() - k, v = s.split('=', 1) - if '.' not in k: - config[k] = yaml.load(v, Loader=yaml.Loader) - else: - keys = k.split('.') - if keys[0] not in config: - config[keys[0]] = {} - cur = config[keys[0]] - for idx, key in enumerate(keys[1:]): - if idx == len(keys) - 2: - cur[key] = yaml.load(v, Loader=yaml.Loader) - else: - cur[key] = {} - cur = cur[key] - return config - - -def print_total_cfg(config): - modules = get_registered_modules() - color_tty = ColorTTY() - green = '___{}___'.format(color_tty.colors.index('green') + 31) - - styled = {} - for key in config.keys(): - if not config[key]: # empty schema - continue - - if key not in modules and not hasattr(config[key], '__dict__'): - styled[key] = config[key] - continue - elif key in modules: - module = modules[key] - else: - type_name = type(config[key]).__name__ - if type_name in modules: - module = modules[type_name].copy() - module.update({ - k: v - for k, v in config[key].__dict__.items() - if k in module.schema - }) - key += " ({})".format(type_name) - default = module.find_default_keys() - missing = module.find_missing_keys() - mismatch = module.find_mismatch_keys() - extra = module.find_extra_keys() - dep_missing = [] - for dep in module.inject: - if isinstance(module[dep], str) and module[dep] != '': - if module[dep] not in modules: # not a valid module - dep_missing.append(dep) - else: - dep_mod = modules[module[dep]] - # empty dict but mandatory - if not dep_mod and dep_mod.mandatory(): - dep_missing.append(dep) - override = list( - set(module.keys()) - set(default) - set(extra) - set(dep_missing)) - replacement = {} - for name in set(override + default + extra + mismatch + missing): - new_name = name - if name in missing: - value = "" - else: - value = module[name] - - if name in extra: - value = dump_value(value) + " " - elif name in mismatch: - value = dump_value(value) + " " - elif name in dep_missing: - value = dump_value(value) + " " - elif name in override and value != '': - mark = green - new_name = mark + name - replacement[new_name] = value - styled[key] = replacement - buffer = yaml.dump(styled, default_flow_style=False, default_style='') - buffer = (re.sub(r"", r"[31m[0m", buffer)) - buffer = (re.sub(r"", r"[33m[0m", buffer)) - buffer = (re.sub(r"", r"[31m[0m", buffer)) - buffer = (re.sub(r"", - r"[31m[0m", buffer)) - buffer = re.sub(r"___(\d+)___(.*?):", r"[\1m\2[0m:", buffer) - print(buffer) diff --git a/PaddleCV/PaddleDetection/ppdet/utils/coco_eval.py b/PaddleCV/PaddleDetection/ppdet/utils/coco_eval.py deleted file mode 100644 index cb5df475fb16f10fffa18a02920687780a3fe298..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/coco_eval.py +++ /dev/null @@ -1,560 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import os -import sys -import json -import cv2 -import numpy as np -import matplotlib -matplotlib.use('Agg') - -import logging -logger = logging.getLogger(__name__) - -__all__ = [ - 'bbox_eval', - 'mask_eval', - 'bbox2out', - 'mask2out', - 'get_category_info', - 'proposal_eval', - 'cocoapi_eval', -] - - -def clip_bbox(bbox): - xmin = max(min(bbox[0], 1.), 0.) - ymin = max(min(bbox[1], 1.), 0.) - xmax = max(min(bbox[2], 1.), 0.) - ymax = max(min(bbox[3], 1.), 0.) - return xmin, ymin, xmax, ymax - - -def proposal_eval(results, anno_file, outfile, max_dets=(100, 300, 1000)): - assert 'proposal' in results[0] - assert outfile.endswith('.json') - - xywh_results = proposal2out(results) - assert len( - xywh_results) > 0, "The number of valid proposal detected is zero.\n \ - Please use reasonable model and check input data." - - with open(outfile, 'w') as f: - json.dump(xywh_results, f) - - cocoapi_eval(outfile, 'proposal', anno_file=anno_file, max_dets=max_dets) - # flush coco evaluation result - sys.stdout.flush() - - -def bbox_eval(results, - anno_file, - outfile, - with_background=True, - is_bbox_normalized=False): - assert 'bbox' in results[0] - assert outfile.endswith('.json') - from pycocotools.coco import COCO - - coco_gt = COCO(anno_file) - cat_ids = coco_gt.getCatIds() - - # when with_background = True, mapping category to classid, like: - # background:0, first_class:1, second_class:2, ... - clsid2catid = dict( - {i + int(with_background): catid - for i, catid in enumerate(cat_ids)}) - - xywh_results = bbox2out( - results, clsid2catid, is_bbox_normalized=is_bbox_normalized) - - if len(xywh_results) == 0: - logger.warning("The number of valid bbox detected is zero.\n \ - Please use reasonable model and check input data.\n \ - stop eval!") - return [0.0] - with open(outfile, 'w') as f: - json.dump(xywh_results, f) - - map_stats = cocoapi_eval(outfile, 'bbox', coco_gt=coco_gt) - # flush coco evaluation result - sys.stdout.flush() - return map_stats - - -def mask_eval(results, anno_file, outfile, resolution, thresh_binarize=0.5): - assert 'mask' in results[0] - assert outfile.endswith('.json') - from pycocotools.coco import COCO - - coco_gt = COCO(anno_file) - clsid2catid = {i + 1: v for i, v in enumerate(coco_gt.getCatIds())} - - segm_results = mask2out(results, clsid2catid, resolution, thresh_binarize) - if len(segm_results) == 0: - logger.warning("The number of valid mask detected is zero.\n \ - Please use reasonable model and check input data.") - return - - with open(outfile, 'w') as f: - json.dump(segm_results, f) - - cocoapi_eval(outfile, 'segm', coco_gt=coco_gt) - - -def cocoapi_eval(jsonfile, - style, - coco_gt=None, - anno_file=None, - max_dets=(100, 300, 1000)): - """ - Args: - jsonfile: Evaluation json file, eg: bbox.json, mask.json. - style: COCOeval style, can be `bbox` , `segm` and `proposal`. - coco_gt: Whether to load COCOAPI through anno_file, - eg: coco_gt = COCO(anno_file) - anno_file: COCO annotations file. - max_dets: COCO evaluation maxDets. - """ - assert coco_gt != None or anno_file != None - from pycocotools.coco import COCO - from pycocotools.cocoeval import COCOeval - - if coco_gt == None: - coco_gt = COCO(anno_file) - logger.info("Start evaluate...") - coco_dt = coco_gt.loadRes(jsonfile) - if style == 'proposal': - coco_eval = COCOeval(coco_gt, coco_dt, 'bbox') - coco_eval.params.useCats = 0 - coco_eval.params.maxDets = list(max_dets) - else: - coco_eval = COCOeval(coco_gt, coco_dt, style) - coco_eval.evaluate() - coco_eval.accumulate() - coco_eval.summarize() - return coco_eval.stats - - -def proposal2out(results, is_bbox_normalized=False): - xywh_res = [] - for t in results: - bboxes = t['proposal'][0] - lengths = t['proposal'][1][0] - im_ids = np.array(t['im_id'][0]) - if bboxes.shape == (1, 1) or bboxes is None: - continue - - k = 0 - for i in range(len(lengths)): - num = lengths[i] - im_id = int(im_ids[i][0]) - for j in range(num): - dt = bboxes[k] - xmin, ymin, xmax, ymax = dt.tolist() - - if is_bbox_normalized: - xmin, ymin, xmax, ymax = \ - clip_bbox([xmin, ymin, xmax, ymax]) - w = xmax - xmin - h = ymax - ymin - else: - w = xmax - xmin + 1 - h = ymax - ymin + 1 - - bbox = [xmin, ymin, w, h] - coco_res = { - 'image_id': im_id, - 'category_id': 1, - 'bbox': bbox, - 'score': 1.0 - } - xywh_res.append(coco_res) - k += 1 - return xywh_res - - -def bbox2out(results, clsid2catid, is_bbox_normalized=False): - """ - Args: - results: request a dict, should include: `bbox`, `im_id`, - if is_bbox_normalized=True, also need `im_shape`. - clsid2catid: class id to category id map of COCO2017 dataset. - is_bbox_normalized: whether or not bbox is normalized. - """ - xywh_res = [] - for t in results: - bboxes = t['bbox'][0] - lengths = t['bbox'][1][0] - im_ids = np.array(t['im_id'][0]) - if bboxes.shape == (1, 1) or bboxes is None: - continue - - k = 0 - for i in range(len(lengths)): - num = lengths[i] - im_id = int(im_ids[i][0]) - for j in range(num): - dt = bboxes[k] - clsid, score, xmin, ymin, xmax, ymax = dt.tolist() - catid = (clsid2catid[int(clsid)]) - - if is_bbox_normalized: - xmin, ymin, xmax, ymax = \ - clip_bbox([xmin, ymin, xmax, ymax]) - w = xmax - xmin - h = ymax - ymin - im_height, im_width = t['im_shape'][0][i].tolist() - xmin *= im_width - ymin *= im_height - w *= im_width - h *= im_height - else: - w = xmax - xmin + 1 - h = ymax - ymin + 1 - - bbox = [xmin, ymin, w, h] - coco_res = { - 'image_id': im_id, - 'category_id': catid, - 'bbox': bbox, - 'score': score - } - xywh_res.append(coco_res) - k += 1 - return xywh_res - - -def mask2out(results, clsid2catid, resolution, thresh_binarize=0.5): - import pycocotools.mask as mask_util - scale = (resolution + 2.0) / resolution - - segm_res = [] - - # for each batch - for t in results: - bboxes = t['bbox'][0] - - lengths = t['bbox'][1][0] - im_ids = np.array(t['im_id'][0]) - if bboxes.shape == (1, 1) or bboxes is None: - continue - if len(bboxes.tolist()) == 0: - continue - - masks = t['mask'][0] - - s = 0 - # for each sample - for i in range(len(lengths)): - num = lengths[i] - im_id = int(im_ids[i][0]) - im_shape = t['im_shape'][0][i] - - bbox = bboxes[s:s + num][:, 2:] - clsid_scores = bboxes[s:s + num][:, 0:2] - mask = masks[s:s + num] - s += num - - im_h = int(im_shape[0]) - im_w = int(im_shape[1]) - - expand_bbox = expand_boxes(bbox, scale) - expand_bbox = expand_bbox.astype(np.int32) - - padded_mask = np.zeros( - (resolution + 2, resolution + 2), dtype=np.float32) - - for j in range(num): - xmin, ymin, xmax, ymax = expand_bbox[j].tolist() - clsid, score = clsid_scores[j].tolist() - clsid = int(clsid) - padded_mask[1:-1, 1:-1] = mask[j, clsid, :, :] - - catid = clsid2catid[clsid] - - w = xmax - xmin + 1 - h = ymax - ymin + 1 - w = np.maximum(w, 1) - h = np.maximum(h, 1) - - resized_mask = cv2.resize(padded_mask, (w, h)) - resized_mask = np.array( - resized_mask > thresh_binarize, dtype=np.uint8) - im_mask = np.zeros((im_h, im_w), dtype=np.uint8) - - x0 = min(max(xmin, 0), im_w) - x1 = min(max(xmax + 1, 0), im_w) - y0 = min(max(ymin, 0), im_h) - y1 = min(max(ymax + 1, 0), im_h) - - im_mask[y0:y1, x0:x1] = resized_mask[(y0 - ymin):(y1 - ymin), ( - x0 - xmin):(x1 - xmin)] - segm = mask_util.encode( - np.array( - im_mask[:, :, np.newaxis], order='F'))[0] - catid = clsid2catid[clsid] - segm['counts'] = segm['counts'].decode('utf8') - coco_res = { - 'image_id': im_id, - 'category_id': catid, - 'segmentation': segm, - 'score': score - } - segm_res.append(coco_res) - return segm_res - - -def expand_boxes(boxes, scale): - """ - Expand an array of boxes by a given scale. - """ - w_half = (boxes[:, 2] - boxes[:, 0]) * .5 - h_half = (boxes[:, 3] - boxes[:, 1]) * .5 - x_c = (boxes[:, 2] + boxes[:, 0]) * .5 - y_c = (boxes[:, 3] + boxes[:, 1]) * .5 - - w_half *= scale - h_half *= scale - - boxes_exp = np.zeros(boxes.shape) - boxes_exp[:, 0] = x_c - w_half - boxes_exp[:, 2] = x_c + w_half - boxes_exp[:, 1] = y_c - h_half - boxes_exp[:, 3] = y_c + h_half - - return boxes_exp - - -def get_category_info(anno_file=None, - with_background=True, - use_default_label=False): - if use_default_label or anno_file is None \ - or not os.path.exists(anno_file): - logger.info("Not found annotation file {}, load " - "coco17 categories.".format(anno_file)) - return coco17_category_info(with_background) - else: - logger.info("Load categories from {}".format(anno_file)) - return get_category_info_from_anno(anno_file, with_background) - - -def get_category_info_from_anno(anno_file, with_background=True): - """ - Get class id to category id map and category id - to category name map from annotation file. - - Args: - anno_file (str): annotation file path - with_background (bool, default True): - whether load background as class 0. - """ - from pycocotools.coco import COCO - coco = COCO(anno_file) - cats = coco.loadCats(coco.getCatIds()) - clsid2catid = { - i + int(with_background): cat['id'] - for i, cat in enumerate(cats) - } - catid2name = {cat['id']: cat['name'] for cat in cats} - - return clsid2catid, catid2name - - -def coco17_category_info(with_background=True): - """ - Get class id to category id map and category id - to category name map of COCO2017 dataset - - Args: - with_background (bool, default True): - whether load background as class 0. - """ - clsid2catid = { - 1: 1, - 2: 2, - 3: 3, - 4: 4, - 5: 5, - 6: 6, - 7: 7, - 8: 8, - 9: 9, - 10: 10, - 11: 11, - 12: 13, - 13: 14, - 14: 15, - 15: 16, - 16: 17, - 17: 18, - 18: 19, - 19: 20, - 20: 21, - 21: 22, - 22: 23, - 23: 24, - 24: 25, - 25: 27, - 26: 28, - 27: 31, - 28: 32, - 29: 33, - 30: 34, - 31: 35, - 32: 36, - 33: 37, - 34: 38, - 35: 39, - 36: 40, - 37: 41, - 38: 42, - 39: 43, - 40: 44, - 41: 46, - 42: 47, - 43: 48, - 44: 49, - 45: 50, - 46: 51, - 47: 52, - 48: 53, - 49: 54, - 50: 55, - 51: 56, - 52: 57, - 53: 58, - 54: 59, - 55: 60, - 56: 61, - 57: 62, - 58: 63, - 59: 64, - 60: 65, - 61: 67, - 62: 70, - 63: 72, - 64: 73, - 65: 74, - 66: 75, - 67: 76, - 68: 77, - 69: 78, - 70: 79, - 71: 80, - 72: 81, - 73: 82, - 74: 84, - 75: 85, - 76: 86, - 77: 87, - 78: 88, - 79: 89, - 80: 90 - } - - catid2name = { - 0: 'background', - 1: 'person', - 2: 'bicycle', - 3: 'car', - 4: 'motorcycle', - 5: 'airplane', - 6: 'bus', - 7: 'train', - 8: 'truck', - 9: 'boat', - 10: 'traffic light', - 11: 'fire hydrant', - 13: 'stop sign', - 14: 'parking meter', - 15: 'bench', - 16: 'bird', - 17: 'cat', - 18: 'dog', - 19: 'horse', - 20: 'sheep', - 21: 'cow', - 22: 'elephant', - 23: 'bear', - 24: 'zebra', - 25: 'giraffe', - 27: 'backpack', - 28: 'umbrella', - 31: 'handbag', - 32: 'tie', - 33: 'suitcase', - 34: 'frisbee', - 35: 'skis', - 36: 'snowboard', - 37: 'sports ball', - 38: 'kite', - 39: 'baseball bat', - 40: 'baseball glove', - 41: 'skateboard', - 42: 'surfboard', - 43: 'tennis racket', - 44: 'bottle', - 46: 'wine glass', - 47: 'cup', - 48: 'fork', - 49: 'knife', - 50: 'spoon', - 51: 'bowl', - 52: 'banana', - 53: 'apple', - 54: 'sandwich', - 55: 'orange', - 56: 'broccoli', - 57: 'carrot', - 58: 'hot dog', - 59: 'pizza', - 60: 'donut', - 61: 'cake', - 62: 'chair', - 63: 'couch', - 64: 'potted plant', - 65: 'bed', - 67: 'dining table', - 70: 'toilet', - 72: 'tv', - 73: 'laptop', - 74: 'mouse', - 75: 'remote', - 76: 'keyboard', - 77: 'cell phone', - 78: 'microwave', - 79: 'oven', - 80: 'toaster', - 81: 'sink', - 82: 'refrigerator', - 84: 'book', - 85: 'clock', - 86: 'vase', - 87: 'scissors', - 88: 'teddy bear', - 89: 'hair drier', - 90: 'toothbrush' - } - - if not with_background: - clsid2catid = {k - 1: v for k, v in clsid2catid.items()} - - return clsid2catid, catid2name diff --git a/PaddleCV/PaddleDetection/ppdet/utils/colormap.py b/PaddleCV/PaddleDetection/ppdet/utils/colormap.py deleted file mode 100644 index 566185ef90390e2f45747bee48e3b008f7dfc0e9..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/colormap.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np - - -def colormap(rgb=False): - """ - Get colormap - """ - color_list = np.array([ - 0.000, 0.447, 0.741, 0.850, 0.325, 0.098, 0.929, 0.694, 0.125, 0.494, - 0.184, 0.556, 0.466, 0.674, 0.188, 0.301, 0.745, 0.933, 0.635, 0.078, - 0.184, 0.300, 0.300, 0.300, 0.600, 0.600, 0.600, 1.000, 0.000, 0.000, - 1.000, 0.500, 0.000, 0.749, 0.749, 0.000, 0.000, 1.000, 0.000, 0.000, - 0.000, 1.000, 0.667, 0.000, 1.000, 0.333, 0.333, 0.000, 0.333, 0.667, - 0.000, 0.333, 1.000, 0.000, 0.667, 0.333, 0.000, 0.667, 0.667, 0.000, - 0.667, 1.000, 0.000, 1.000, 0.333, 0.000, 1.000, 0.667, 0.000, 1.000, - 1.000, 0.000, 0.000, 0.333, 0.500, 0.000, 0.667, 0.500, 0.000, 1.000, - 0.500, 0.333, 0.000, 0.500, 0.333, 0.333, 0.500, 0.333, 0.667, 0.500, - 0.333, 1.000, 0.500, 0.667, 0.000, 0.500, 0.667, 0.333, 0.500, 0.667, - 0.667, 0.500, 0.667, 1.000, 0.500, 1.000, 0.000, 0.500, 1.000, 0.333, - 0.500, 1.000, 0.667, 0.500, 1.000, 1.000, 0.500, 0.000, 0.333, 1.000, - 0.000, 0.667, 1.000, 0.000, 1.000, 1.000, 0.333, 0.000, 1.000, 0.333, - 0.333, 1.000, 0.333, 0.667, 1.000, 0.333, 1.000, 1.000, 0.667, 0.000, - 1.000, 0.667, 0.333, 1.000, 0.667, 0.667, 1.000, 0.667, 1.000, 1.000, - 1.000, 0.000, 1.000, 1.000, 0.333, 1.000, 1.000, 0.667, 1.000, 0.167, - 0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, - 0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, - 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, - 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000, - 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, - 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.143, 0.143, 0.143, 0.286, - 0.286, 0.286, 0.429, 0.429, 0.429, 0.571, 0.571, 0.571, 0.714, 0.714, - 0.714, 0.857, 0.857, 0.857, 1.000, 1.000, 1.000 - ]).astype(np.float32) - color_list = color_list.reshape((-1, 3)) * 255 - if not rgb: - color_list = color_list[:, ::-1] - return color_list diff --git a/PaddleCV/PaddleDetection/ppdet/utils/dist_utils.py b/PaddleCV/PaddleDetection/ppdet/utils/dist_utils.py deleted file mode 100644 index 32eead4a797ba70cb6980e0368ff9873102680c2..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/dist_utils.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import - -import os - -import paddle.fluid as fluid - - -def nccl2_prepare(trainer_id, startup_prog, main_prog): - config = fluid.DistributeTranspilerConfig() - config.mode = "nccl2" - t = fluid.DistributeTranspiler(config=config) - t.transpile( - trainer_id, - trainers=os.environ.get('PADDLE_TRAINER_ENDPOINTS'), - current_endpoint=os.environ.get('PADDLE_CURRENT_ENDPOINT'), - startup_program=startup_prog, - program=main_prog) - - -def prepare_for_multi_process(exe, build_strategy, startup_prog, main_prog): - trainer_id = int(os.environ.get('PADDLE_TRAINER_ID', 0)) - num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - if num_trainers < 2: - return - build_strategy.num_trainers = num_trainers - build_strategy.trainer_id = trainer_id - nccl2_prepare(trainer_id, startup_prog, main_prog) diff --git a/PaddleCV/PaddleDetection/ppdet/utils/download.py b/PaddleCV/PaddleDetection/ppdet/utils/download.py deleted file mode 100644 index 772e19044c320bc0fb7ba5e75216951915074cf5..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/download.py +++ /dev/null @@ -1,361 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import os.path as osp -import shutil -import requests -import tqdm -import hashlib -import tarfile -import zipfile - -from .voc_utils import create_list - -import logging -logger = logging.getLogger(__name__) - -__all__ = ['get_weights_path', 'get_dataset_path'] - -WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/weights") -DATASET_HOME = osp.expanduser("~/.cache/paddle/dataset") - -# dict of {dataset_name: (download_info, sub_dirs)} -# download info: (url, md5sum) -DATASETS = { - 'coco': ([ - ( - 'http://images.cocodataset.org/zips/train2017.zip', - 'cced6f7f71b7629ddf16f17bbcfab6b2', ), - ( - 'http://images.cocodataset.org/zips/val2017.zip', - '442b8da7639aecaf257c1dceb8ba8c80', ), - ( - 'http://images.cocodataset.org/annotations/annotations_trainval2017.zip', - 'f4bbac642086de4f52a3fdda2de5fa2c', ), - ], ["annotations", "train2017", "val2017"]), - 'voc': ([ - ( - 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar', - '6cd6e144f989b92b3379bac3b3de84fd', ), - ( - 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar', - 'c52e279531787c972589f7e41ab4ae64', ), - ( - 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar', - 'b6e924de25625d8de591ea690078ad9f', ), - ], ["VOCdevkit/VOC2012", "VOCdevkit/VOC2007"]), - 'wider_face': ([ - ( - 'https://dataset.bj.bcebos.com/wider_face/WIDER_train.zip', - '3fedf70df600953d25982bcd13d91ba2', ), - ( - 'https://dataset.bj.bcebos.com/wider_face/WIDER_val.zip', - 'dfa7d7e790efa35df3788964cf0bbaea', ), - ( - 'https://dataset.bj.bcebos.com/wider_face/wider_face_split.zip', - 'a4a898d6193db4b9ef3260a68bad0dc7', ), - ], ["WIDER_train", "WIDER_val", "wider_face_split"]), - 'fruit': ([( - 'https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar', - 'ee4a1bf2e321b75b0850cc6e063f79d7', ), ], ["fruit-detection"]), - 'objects365': (), -} - -DOWNLOAD_RETRY_LIMIT = 3 - - -def get_weights_path(url): - """Get weights path from WEIGHT_HOME, if not exists, - download it from url. - """ - path, _ = get_path(url, WEIGHTS_HOME) - return path - - -def get_dataset_path(path, annotation, image_dir): - """ - If path exists, return path. - Otherwise, get dataset path from DATASET_HOME, if not exists, - download it. - """ - if _dataset_exists(path, annotation, image_dir): - return path - - logger.info("Dataset {} is not valid for reason above, try searching {} or " - "downloading dataset...".format( - osp.realpath(path), DATASET_HOME)) - - for name, dataset in DATASETS.items(): - if os.path.split(path.strip().lower())[-1] == name: - logger.info("Parse dataset_dir {} as dataset " - "{}".format(path, name)) - if name == 'objects365': - raise NotImplementedError( - "Dataset {} is not valid for download automatically." - "Please apply and download the dataset from." - "https://www.objects365.org/download.html") - data_dir = osp.join(DATASET_HOME, name) - - # For voc, only check dir VOCdevkit/VOC2012, VOCdevkit/VOC2007 - if name == 'voc': - exists = True - for sub_dir in dataset[1]: - check_dir = osp.join(data_dir, sub_dir) - if osp.exists(check_dir): - logger.info("Found {}".format(check_dir)) - else: - exists = False - if exists: - return data_dir - - # voc exist is checked above, voc is not exist here - check_exist = name != 'voc' - for url, md5sum in dataset[0]: - get_path(url, data_dir, md5sum, check_exist) - - # voc should create list after download - if name == 'voc': - create_voc_list(data_dir) - return data_dir - - # not match any dataset in DATASETS - raise ValueError("Dataset {} is not valid and cannot parse dataset type " - "'{}' for automaticly downloading, which only supports " - "'voc', 'coco', 'wider_face' and 'fruit' currently".format( - path, osp.split(path)[-1])) - - -def create_voc_list(data_dir, devkit_subdir='VOCdevkit'): - logger.info("Create voc file list...") - devkit_dir = osp.join(data_dir, devkit_subdir) - years = ['2007', '2012'] - - # NOTE: since using auto download VOC - # dataset, VOC default label list should be used, - # do not generate label_list.txt here. For default - # label, see ../data/source/voc_loader.py - create_list(devkit_dir, years, data_dir) - logger.info("Create voc file list finished") - - -def map_path(url, root_dir): - # parse path after download to decompress under root_dir - fname = osp.split(url)[-1] - zip_formats = ['.zip', '.tar', '.gz'] - fpath = fname - for zip_format in zip_formats: - fpath = fpath.replace(zip_format, '') - return osp.join(root_dir, fpath) - - -def get_path(url, root_dir, md5sum=None, check_exist=True): - """ Download from given url to root_dir. - if file or directory specified by url is exists under - root_dir, return the path directly, otherwise download - from url and decompress it, return the path. - - url (str): download url - root_dir (str): root dir for downloading, it should be - WEIGHTS_HOME or DATASET_HOME - md5sum (str): md5 sum of download package - """ - # parse path after download to decompress under root_dir - fullpath = map_path(url, root_dir) - - # For same zip file, decompressed directory name different - # from zip file name, rename by following map - decompress_name_map = { - "VOCtrainval_11-May-2012": "VOCdevkit/VOC2012", - "VOCtrainval_06-Nov-2007": "VOCdevkit/VOC2007", - "VOCtest_06-Nov-2007": "VOCdevkit/VOC2007", - "annotations_trainval": "annotations" - } - for k, v in decompress_name_map.items(): - if fullpath.find(k) >= 0: - fullpath = osp.join(osp.split(fullpath)[0], v) - - exist_flag = False - if osp.exists(fullpath) and check_exist: - exist_flag = True - logger.info("Found {}".format(fullpath)) - else: - exist_flag = False - fullname = _download(url, root_dir, md5sum) - _decompress(fullname) - - return fullpath, exist_flag - - -def download_dataset(path, dataset=None): - if dataset not in DATASETS.keys(): - logger.error("Unknown dataset {}, it should be " - "{}".format(dataset, DATASETS.keys())) - return - dataset_info = DATASETS[dataset][0] - for info in dataset_info: - get_path(info[0], path, info[1], False) - logger.info("Download dataset {} finished.".format(dataset)) - - -def _dataset_exists(path, annotation, image_dir): - """ - Check if user define dataset exists - """ - if not osp.exists(path): - logger.info("Config dataset_dir {} is not exits, " - "dataset config is not valid".format(path)) - return False - - if annotation: - annotation_path = osp.join(path, annotation) - if not osp.isfile(annotation_path): - logger.info("Config annotation {} is not a " - "file, dataset config is not " - "valid".format(annotation_path)) - return False - if image_dir: - image_path = osp.join(path, image_dir) - if not osp.isdir(image_path): - logger.info("Config image_dir {} is not a " - "directory, dataset config is not " - "valid".format(image_path)) - return False - return True - - -def _download(url, path, md5sum=None): - """ - Download from url, save to path. - - url (str): download url - path (str): download to given path - """ - if not osp.exists(path): - os.makedirs(path) - - fname = osp.split(url)[-1] - fullname = osp.join(path, fname) - retry_cnt = 0 - - while not (osp.exists(fullname) and _md5check(fullname, md5sum)): - if retry_cnt < DOWNLOAD_RETRY_LIMIT: - retry_cnt += 1 - else: - raise RuntimeError("Download from {} failed. " - "Retry limit reached".format(url)) - - logger.info("Downloading {} from {}".format(fname, url)) - - req = requests.get(url, stream=True) - if req.status_code != 200: - raise RuntimeError("Downloading from {} failed with code " - "{}!".format(url, req.status_code)) - - # For protecting download interupted, download to - # tmp_fullname firstly, move tmp_fullname to fullname - # after download finished - tmp_fullname = fullname + "_tmp" - total_size = req.headers.get('content-length') - with open(tmp_fullname, 'wb') as f: - if total_size: - for chunk in tqdm.tqdm( - req.iter_content(chunk_size=1024), - total=(int(total_size) + 1023) // 1024, - unit='KB'): - f.write(chunk) - else: - for chunk in req.iter_content(chunk_size=1024): - if chunk: - f.write(chunk) - shutil.move(tmp_fullname, fullname) - - return fullname - - -def _md5check(fullname, md5sum=None): - if md5sum is None: - return True - - logger.info("File {} md5 checking...".format(fullname)) - md5 = hashlib.md5() - with open(fullname, 'rb') as f: - for chunk in iter(lambda: f.read(4096), b""): - md5.update(chunk) - calc_md5sum = md5.hexdigest() - - if calc_md5sum != md5sum: - logger.info("File {} md5 check failed, {}(calc) != " - "{}(base)".format(fullname, calc_md5sum, md5sum)) - return False - return True - - -def _decompress(fname): - """ - Decompress for zip and tar file - """ - logger.info("Decompressing {}...".format(fname)) - - # For protecting decompressing interupted, - # decompress to fpath_tmp directory firstly, if decompress - # successed, move decompress files to fpath and delete - # fpath_tmp and remove download compress file. - fpath = osp.split(fname)[0] - fpath_tmp = osp.join(fpath, 'tmp') - if osp.isdir(fpath_tmp): - shutil.rmtree(fpath_tmp) - os.makedirs(fpath_tmp) - - if fname.find('tar') >= 0: - with tarfile.open(fname) as tf: - tf.extractall(path=fpath_tmp) - elif fname.find('zip') >= 0: - with zipfile.ZipFile(fname) as zf: - zf.extractall(path=fpath_tmp) - else: - raise TypeError("Unsupport compress file type {}".format(fname)) - - for f in os.listdir(fpath_tmp): - src_dir = osp.join(fpath_tmp, f) - dst_dir = osp.join(fpath, f) - _move_and_merge_tree(src_dir, dst_dir) - - shutil.rmtree(fpath_tmp) - os.remove(fname) - - -def _move_and_merge_tree(src, dst): - """ - Move src directory to dst, if dst is already exists, - merge src to dst - """ - if not osp.exists(dst): - shutil.move(src, dst) - else: - for fp in os.listdir(src): - src_fp = osp.join(src, fp) - dst_fp = osp.join(dst, fp) - if osp.isdir(src_fp): - if osp.isdir(dst_fp): - _move_and_merge_tree(src_fp, dst_fp) - else: - shutil.move(src_fp, dst_fp) - elif osp.isfile(src_fp) and \ - not osp.isfile(dst_fp): - shutil.move(src_fp, dst_fp) diff --git a/PaddleCV/PaddleDetection/ppdet/utils/eval_utils.py b/PaddleCV/PaddleDetection/ppdet/utils/eval_utils.py deleted file mode 100644 index dbded30197b46e12282d50991406a6585c0cb572..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/eval_utils.py +++ /dev/null @@ -1,247 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import logging -import numpy as np -import os -import time - -import paddle.fluid as fluid - -from ppdet.utils.voc_eval import bbox_eval as voc_bbox_eval -from ppdet.utils.post_process import mstest_box_post_process, mstest_mask_post_process, box_flip - -__all__ = ['parse_fetches', 'eval_run', 'eval_results', 'json_eval_results'] - -logger = logging.getLogger(__name__) - - -def parse_fetches(fetches, prog=None, extra_keys=None): - """ - Parse fetch variable infos from model fetches, - values for fetch_list and keys for stat - """ - keys, values = [], [] - cls = [] - for k, v in fetches.items(): - if hasattr(v, 'name'): - keys.append(k) - v.persistable = True - values.append(v.name) - else: - cls.append(v) - - if prog is not None and extra_keys is not None: - for k in extra_keys: - try: - v = fluid.framework._get_var(k, prog) - keys.append(k) - values.append(v.name) - except Exception: - pass - - return keys, values, cls - - -def length2lod(length_lod): - offset_lod = [0] - for i in length_lod: - offset_lod.append(offset_lod[-1] + i) - return [offset_lod] - - -def get_sub_feed(input, place): - new_dict = {} - res_feed = {} - key_name = ['bbox', 'im_info', 'im_id', 'im_shape', 'bbox_flip'] - for k in key_name: - if k in input.keys(): - new_dict[k] = input[k] - for k in input.keys(): - if 'image' in k: - new_dict[k] = input[k] - for k, v in new_dict.items(): - data_t = fluid.LoDTensor() - data_t.set(v[0], place) - if 'bbox' in k: - lod = length2lod(v[1][0]) - data_t.set_lod(lod) - res_feed[k] = data_t - return res_feed - - -def clean_res(result, keep_name_list): - clean_result = {} - for k in result.keys(): - if k in keep_name_list: - clean_result[k] = result[k] - result.clear() - return clean_result - - -def eval_run(exe, - compile_program, - pyreader, - keys, - values, - cls, - cfg=None, - sub_prog=None, - sub_keys=None, - sub_values=None): - """ - Run evaluation program, return program outputs. - """ - iter_id = 0 - results = [] - if len(cls) != 0: - values = [] - for i in range(len(cls)): - _, accum_map = cls[i].get_map_var() - cls[i].reset(exe) - values.append(accum_map) - - images_num = 0 - start_time = time.time() - has_bbox = 'bbox' in keys - - try: - pyreader.start() - while True: - outs = exe.run(compile_program, - fetch_list=values, - return_numpy=False) - res = { - k: (np.array(v), v.recursive_sequence_lengths()) - for k, v in zip(keys, outs) - } - multi_scale_test = getattr(cfg, 'MultiScaleTEST', None) - mask_multi_scale_test = multi_scale_test and 'Mask' in cfg.architecture - - if multi_scale_test: - post_res = mstest_box_post_process(res, cfg) - res.update(post_res) - if mask_multi_scale_test: - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - sub_feed = get_sub_feed(res, place) - sub_prog_outs = exe.run(sub_prog, - feed=sub_feed, - fetch_list=sub_values, - return_numpy=False) - sub_prog_res = { - k: (np.array(v), v.recursive_sequence_lengths()) - for k, v in zip(sub_keys, sub_prog_outs) - } - post_res = mstest_mask_post_process(sub_prog_res, cfg) - res.update(post_res) - if multi_scale_test: - res = clean_res( - res, ['im_info', 'bbox', 'im_id', 'im_shape', 'mask']) - results.append(res) - if iter_id % 100 == 0: - logger.info('Test iter {}'.format(iter_id)) - iter_id += 1 - images_num += len(res['bbox'][1][0]) if has_bbox else 1 - except (StopIteration, fluid.core.EOFException): - pyreader.reset() - logger.info('Test finish iter {}'.format(iter_id)) - - end_time = time.time() - fps = images_num / (end_time - start_time) - if has_bbox: - logger.info('Total number of images: {}, inference time: {} fps.'. - format(images_num, fps)) - else: - logger.info('Total iteration: {}, inference time: {} batch/s.'.format( - images_num, fps)) - - return results - - -def eval_results(results, - feed, - metric, - num_classes, - resolution=None, - is_bbox_normalized=False, - output_directory=None, - map_type='11point'): - """Evaluation for evaluation program results""" - box_ap_stats = [] - if metric == 'COCO': - from ppdet.utils.coco_eval import proposal_eval, bbox_eval, mask_eval - anno_file = getattr(feed.dataset, 'annotation', None) - with_background = getattr(feed, 'with_background', True) - if 'proposal' in results[0]: - output = 'proposal.json' - if output_directory: - output = os.path.join(output_directory, 'proposal.json') - proposal_eval(results, anno_file, output) - if 'bbox' in results[0]: - output = 'bbox.json' - if output_directory: - output = os.path.join(output_directory, 'bbox.json') - - box_ap_stats = bbox_eval( - results, - anno_file, - output, - with_background, - is_bbox_normalized=is_bbox_normalized) - - if 'mask' in results[0]: - output = 'mask.json' - if output_directory: - output = os.path.join(output_directory, 'mask.json') - mask_eval(results, anno_file, output, resolution) - else: - if 'accum_map' in results[-1]: - res = np.mean(results[-1]['accum_map'][0]) - logger.info('mAP: {:.2f}'.format(res * 100.)) - box_ap_stats.append(res * 100.) - elif 'bbox' in results[0]: - box_ap = voc_bbox_eval( - results, - num_classes, - is_bbox_normalized=is_bbox_normalized, - map_type=map_type) - box_ap_stats.append(box_ap) - return box_ap_stats - - -def json_eval_results(feed, metric, json_directory=None): - """ - cocoapi eval with already exists proposal.json, bbox.json or mask.json - """ - assert metric == 'COCO' - from ppdet.utils.coco_eval import cocoapi_eval - anno_file = getattr(feed.dataset, 'annotation', None) - json_file_list = ['proposal.json', 'bbox.json', 'mask.json'] - if json_directory: - assert os.path.exists( - json_directory), "The json directory:{} does not exist".format( - json_directory) - for k, v in enumerate(json_file_list): - json_file_list[k] = os.path.join(str(json_directory), v) - - coco_eval_style = ['proposal', 'bbox', 'segm'] - for i, v_json in enumerate(json_file_list): - if os.path.exists(v_json): - cocoapi_eval(v_json, coco_eval_style[i], anno_file=anno_file) - else: - logger.info("{} not exists!".format(v_json)) diff --git a/PaddleCV/PaddleDetection/ppdet/utils/map_utils.py b/PaddleCV/PaddleDetection/ppdet/utils/map_utils.py deleted file mode 100644 index fe0530596bb54920a81235ee6716e2d43918899a..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/map_utils.py +++ /dev/null @@ -1,214 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import sys -import numpy as np -import logging -logger = logging.getLogger(__name__) - -__all__ = ['bbox_area', 'jaccard_overlap', 'DetectionMAP'] - - -def bbox_area(bbox, is_bbox_normalized): - """ - Calculate area of a bounding box - """ - norm = 1. - float(is_bbox_normalized) - width = bbox[2] - bbox[0] + norm - height = bbox[3] - bbox[1] + norm - return width * height - - -def jaccard_overlap(pred, gt, is_bbox_normalized=False): - """ - Calculate jaccard overlap ratio between two bounding box - """ - if pred[0] >= gt[2] or pred[2] <= gt[0] or \ - pred[1] >= gt[3] or pred[3] <= gt[1]: - return 0. - inter_xmin = max(pred[0], gt[0]) - inter_ymin = max(pred[1], gt[1]) - inter_xmax = min(pred[2], gt[2]) - inter_ymax = min(pred[3], gt[3]) - inter_size = bbox_area([inter_xmin, inter_ymin, inter_xmax, inter_ymax], - is_bbox_normalized) - pred_size = bbox_area(pred, is_bbox_normalized) - gt_size = bbox_area(gt, is_bbox_normalized) - overlap = float(inter_size) / (pred_size + gt_size - inter_size) - return overlap - - -class DetectionMAP(object): - """ - Calculate detection mean average precision. - Currently support two types: 11point and integral - - Args: - class_num (int): the class number. - overlap_thresh (float): The threshold of overlap - ratio between prediction bounding box and - ground truth bounding box for deciding - true/false positive. Default 0.5. - map_type (str): calculation method of mean average - precision, currently support '11point' and - 'integral'. Default '11point'. - is_bbox_normalized (bool): whther bounding boxes - is normalized to range[0, 1]. Default False. - evaluate_difficult (bool): whether to evaluate - difficult bounding boxes. Default False. - """ - - def __init__(self, - class_num, - overlap_thresh=0.5, - map_type='11point', - is_bbox_normalized=False, - evaluate_difficult=False): - self.class_num = class_num - self.overlap_thresh = overlap_thresh - assert map_type in ['11point', 'integral'], \ - "map_type currently only support '11point' "\ - "and 'integral'" - self.map_type = map_type - self.is_bbox_normalized = is_bbox_normalized - self.evaluate_difficult = evaluate_difficult - self.reset() - - def update(self, bbox, gt_box, gt_label, difficult=None): - """ - Update metric statics from given prediction and ground - truth infomations. - """ - if difficult is None: - difficult = np.zeros_like(gt_label) - - # record class gt count - for gtl, diff in zip(gt_label, difficult): - if self.evaluate_difficult or int(diff) == 0: - self.class_gt_counts[int(np.array(gtl))] += 1 - - # record class score positive - visited = [False] * len(gt_label) - for b in bbox: - label, score, xmin, ymin, xmax, ymax = b.tolist() - pred = [xmin, ymin, xmax, ymax] - max_idx = -1 - max_overlap = -1.0 - for i, gl in enumerate(gt_label): - if int(gl) == int(label): - overlap = jaccard_overlap(pred, gt_box[i], - self.is_bbox_normalized) - if overlap > max_overlap: - max_overlap = overlap - max_idx = i - - if max_overlap > self.overlap_thresh: - if self.evaluate_difficult or \ - int(np.array(difficult[max_idx])) == 0: - if not visited[max_idx]: - self.class_score_poss[int(label)].append([score, 1.0]) - visited[max_idx] = True - else: - self.class_score_poss[int(label)].append([score, 0.0]) - else: - self.class_score_poss[int(label)].append([score, 0.0]) - - def reset(self): - """ - Reset metric statics - """ - self.class_score_poss = [[] for _ in range(self.class_num)] - self.class_gt_counts = [0] * self.class_num - self.mAP = None - - def accumulate(self): - """ - Accumulate metric results and calculate mAP - """ - mAP = 0. - valid_cnt = 0 - for score_pos, count in zip(self.class_score_poss, - self.class_gt_counts): - if count == 0 or len(score_pos) == 0: - continue - - accum_tp_list, accum_fp_list = \ - self._get_tp_fp_accum(score_pos) - precision = [] - recall = [] - for ac_tp, ac_fp in zip(accum_tp_list, accum_fp_list): - precision.append(float(ac_tp) / (ac_tp + ac_fp)) - recall.append(float(ac_tp) / count) - - if self.map_type == '11point': - max_precisions = [0.] * 11 - start_idx = len(precision) - 1 - for j in range(10, -1, -1): - for i in range(start_idx, -1, -1): - if recall[i] < float(j) / 10.: - start_idx = i - if j > 0: - max_precisions[j - 1] = max_precisions[j] - break - else: - if max_precisions[j] < precision[i]: - max_precisions[j] = precision[i] - mAP += sum(max_precisions) / 11. - valid_cnt += 1 - elif self.map_type == 'integral': - import math - ap = 0. - prev_recall = 0. - for i in range(len(precision)): - recall_gap = math.fabs(recall[i] - prev_recall) - if recall_gap > 1e-6: - ap += precision[i] * recall_gap - prev_recall = recall[i] - mAP += ap - valid_cnt += 1 - else: - logger.error("Unspported mAP type {}".format(self.map_type)) - sys.exit(1) - - self.mAP = mAP / float(valid_cnt) if valid_cnt > 0 else mAP - - def get_map(self): - """ - Get mAP result - """ - if self.mAP is None: - logger.error("mAP is not calculated.") - return self.mAP - - def _get_tp_fp_accum(self, score_pos_list): - """ - Calculate accumulating true/false positive results from - [score, pos] records - """ - sorted_list = sorted(score_pos_list, key=lambda s: s[0], reverse=True) - accum_tp = 0 - accum_fp = 0 - accum_tp_list = [] - accum_fp_list = [] - for (score, pos) in sorted_list: - accum_tp += int(pos) - accum_tp_list.append(accum_tp) - accum_fp += 1 - int(pos) - accum_fp_list.append(accum_fp) - return accum_tp_list, accum_fp_list diff --git a/PaddleCV/PaddleDetection/ppdet/utils/post_process.py b/PaddleCV/PaddleDetection/ppdet/utils/post_process.py deleted file mode 100644 index cc80bc186626c4e983328ffd53f53d217230a244..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/post_process.py +++ /dev/null @@ -1,212 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import logging -import numpy as np - -import paddle.fluid as fluid - -__all__ = ['nms'] - -logger = logging.getLogger(__name__) - - -def box_flip(boxes, im_shape): - im_width = im_shape[0][1] - flipped_boxes = boxes.copy() - - flipped_boxes[:, 0::4] = im_width - boxes[:, 2::4] - 1 - flipped_boxes[:, 2::4] = im_width - boxes[:, 0::4] - 1 - return flipped_boxes - - -def nms(dets, thresh): - """Apply classic DPM-style greedy NMS.""" - if dets.shape[0] == 0: - return [] - scores = dets[:, 0] - x1 = dets[:, 1] - y1 = dets[:, 2] - x2 = dets[:, 3] - y2 = dets[:, 4] - - areas = (x2 - x1 + 1) * (y2 - y1 + 1) - order = scores.argsort()[::-1] - - ndets = dets.shape[0] - suppressed = np.zeros((ndets), dtype=np.int) - - # nominal indices - # _i, _j - # sorted indices - # i, j - # temp variables for box i's (the box currently under consideration) - # ix1, iy1, ix2, iy2, iarea - - # variables for computing overlap with box j (lower scoring box) - # xx1, yy1, xx2, yy2 - # w, h - # inter, ovr - - for _i in range(ndets): - i = order[_i] - if suppressed[i] == 1: - continue - ix1 = x1[i] - iy1 = y1[i] - ix2 = x2[i] - iy2 = y2[i] - iarea = areas[i] - for _j in range(_i + 1, ndets): - j = order[_j] - if suppressed[j] == 1: - continue - xx1 = max(ix1, x1[j]) - yy1 = max(iy1, y1[j]) - xx2 = min(ix2, x2[j]) - yy2 = min(iy2, y2[j]) - w = max(0.0, xx2 - xx1 + 1) - h = max(0.0, yy2 - yy1 + 1) - inter = w * h - ovr = inter / (iarea + areas[j] - inter) - if ovr >= thresh: - suppressed[j] = 1 - - return np.where(suppressed == 0)[0] - - -def bbox_area(box): - w = box[2] - box[0] + 1 - h = box[3] - box[1] + 1 - return w * h - - -def bbox_overlaps(x, y): - N = x.shape[0] - K = y.shape[0] - overlaps = np.zeros((N, K), dtype=np.float32) - for k in range(K): - y_area = bbox_area(y[k]) - for n in range(N): - iw = min(x[n, 2], y[k, 2]) - max(x[n, 0], y[k, 0]) + 1 - if iw > 0: - ih = min(x[n, 3], y[k, 3]) - max(x[n, 1], y[k, 1]) + 1 - if ih > 0: - x_area = bbox_area(x[n]) - ua = x_area + y_area - iw * ih - overlaps[n, k] = iw * ih / ua - return overlaps - - -def box_voting(nms_dets, dets, vote_thresh): - top_dets = nms_dets.copy() - top_boxes = nms_dets[:, 1:] - all_boxes = dets[:, 1:] - all_scores = dets[:, 0] - top_to_all_overlaps = bbox_overlaps(top_boxes, all_boxes) - for k in range(nms_dets.shape[0]): - inds_to_vote = np.where(top_to_all_overlaps[k] >= vote_thresh)[0] - boxes_to_vote = all_boxes[inds_to_vote, :] - ws = all_scores[inds_to_vote] - top_dets[k, 1:] = np.average(boxes_to_vote, axis=0, weights=ws) - - return top_dets - - -def get_nms_result(boxes, scores, cfg): - cls_boxes = [[] for _ in range(cfg.num_classes)] - for j in range(1, cfg.num_classes): - inds = np.where(scores[:, j] > cfg.MultiScaleTEST['score_thresh'])[0] - scores_j = scores[inds, j] - boxes_j = boxes[inds, j * 4:(j + 1) * 4] - dets_j = np.hstack((scores_j[:, np.newaxis], boxes_j)).astype( - np.float32, copy=False) - keep = nms(dets_j, cfg.MultiScaleTEST['nms_thresh']) - nms_dets = dets_j[keep, :] - if cfg.MultiScaleTEST['enable_voting']: - nms_dets = box_voting(nms_dets, dets_j, - cfg.MultiScaleTEST['vote_thresh']) - #add labels - label = np.array([j for _ in range(len(keep))]) - nms_dets = np.hstack((label[:, np.newaxis], nms_dets)).astype( - np.float32, copy=False) - cls_boxes[j] = nms_dets - # Limit to max_per_image detections **over all classes** - image_scores = np.hstack( - [cls_boxes[j][:, 1] for j in range(1, cfg.num_classes)]) - if len(image_scores) > cfg.MultiScaleTEST['detections_per_im']: - image_thresh = np.sort(image_scores)[-cfg.MultiScaleTEST[ - 'detections_per_im']] - for j in range(1, cfg.num_classes): - keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0] - cls_boxes[j] = cls_boxes[j][keep, :] - - im_results = np.vstack([cls_boxes[j] for j in range(1, cfg.num_classes)]) - return im_results - - -def mstest_box_post_process(result, cfg): - """ - Multi-scale Test - Only available for batch_size=1 now. - """ - post_bbox = {} - use_flip = False - ms_boxes = [] - ms_scores = [] - im_shape = result['im_shape'][0] - for k in result.keys(): - if 'bbox' in k: - boxes = result[k][0] - boxes = np.reshape(boxes, (-1, 4 * cfg.num_classes)) - scores = result['score' + k[4:]][0] - if 'flip' in k: - boxes = box_flip(boxes, im_shape) - use_flip = True - ms_boxes.append(boxes) - ms_scores.append(scores) - - ms_boxes = np.concatenate(ms_boxes) - ms_scores = np.concatenate(ms_scores) - bbox_pred = get_nms_result(ms_boxes, ms_scores, cfg) - post_bbox.update({'bbox': (bbox_pred, [[len(bbox_pred)]])}) - if use_flip: - bbox = bbox_pred[:, 2:] - bbox_flip = np.append( - bbox_pred[:, :2], box_flip(bbox, im_shape), axis=1) - post_bbox.update({'bbox_flip': (bbox_flip, [[len(bbox_flip)]])}) - return post_bbox - - -def mstest_mask_post_process(result, cfg): - mask_list = [] - im_shape = result['im_shape'][0] - M = cfg.FPNRoIAlign['mask_resolution'] - for k in result.keys(): - if 'mask' in k: - masks = result[k][0] - if len(masks.shape) != 4: - masks = np.zeros((0, M, M)) - mask_list.append(masks) - continue - if 'flip' in k: - masks = masks[:, :, :, ::-1] - mask_list.append(masks) - - mask_pred = np.mean(mask_list, axis=0) - return {'mask': (mask_pred, [[len(mask_pred)]])} diff --git a/PaddleCV/PaddleDetection/ppdet/utils/stats.py b/PaddleCV/PaddleDetection/ppdet/utils/stats.py deleted file mode 100644 index 4d7e36babf8d53170162cfd5581f591e376ec8cd..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/stats.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import collections -import numpy as np -import datetime - -__all__ = ['TrainingStats', 'Time'] - - -class SmoothedValue(object): - """Track a series of values and provide access to smoothed values over a - window or the global series average. - """ - - def __init__(self, window_size): - self.deque = collections.deque(maxlen=window_size) - - def add_value(self, value): - self.deque.append(value) - - def get_median_value(self): - return np.median(self.deque) - - -def Time(): - return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f') - - -class TrainingStats(object): - def __init__(self, window_size, stats_keys): - self.smoothed_losses_and_metrics = { - key: SmoothedValue(window_size) - for key in stats_keys - } - - def update(self, stats): - for k, v in self.smoothed_losses_and_metrics.items(): - v.add_value(stats[k]) - - def get(self, extras=None): - stats = collections.OrderedDict() - if extras: - for k, v in extras.items(): - stats[k] = v - for k, v in self.smoothed_losses_and_metrics.items(): - stats[k] = format(v.get_median_value(), '.6f') - - return stats - - def log(self, extras=None): - d = self.get(extras) - strs = ', '.join(str(dict({x: y})).strip('{}') for x, y in d.items()) - return strs diff --git a/PaddleCV/PaddleDetection/ppdet/utils/visualizer.py b/PaddleCV/PaddleDetection/ppdet/utils/visualizer.py deleted file mode 100644 index ff35dc2d577fcaff042583d63c709a498dadb1da..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/visualizer.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np -import pycocotools.mask as mask_util -from PIL import Image, ImageDraw - -from .colormap import colormap - -__all__ = ['visualize_results'] - - -def visualize_results(image, - im_id, - catid2name, - threshold=0.5, - bbox_results=None, - mask_results=None): - """ - Visualize bbox and mask results - """ - if mask_results: - image = draw_mask(image, im_id, mask_results, threshold) - if bbox_results: - image = draw_bbox(image, im_id, catid2name, bbox_results, - threshold) - return image - - -def draw_mask(image, im_id, segms, threshold, alpha=0.7): - """ - Draw mask on image - """ - mask_color_id = 0 - w_ratio = .4 - color_list = colormap(rgb=True) - img_array = np.array(image).astype('float32') - for dt in np.array(segms): - if im_id != dt['image_id']: - continue - segm, score = dt['segmentation'], dt['score'] - if score < threshold: - continue - mask = mask_util.decode(segm) * 255 - color_mask = color_list[mask_color_id % len(color_list), 0:3] - mask_color_id += 1 - for c in range(3): - color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio * 255 - idx = np.nonzero(mask) - img_array[idx[0], idx[1], :] *= 1.0 - alpha - img_array[idx[0], idx[1], :] += alpha * color_mask - return Image.fromarray(img_array.astype('uint8')) - - -def draw_bbox(image, im_id, catid2name, bboxes, threshold): - """ - Draw bbox on image - """ - draw = ImageDraw.Draw(image) - - catid2color = {} - color_list = colormap(rgb=True)[:40] - for dt in np.array(bboxes): - if im_id != dt['image_id']: - continue - catid, bbox, score = dt['category_id'], dt['bbox'], dt['score'] - if score < threshold: - continue - - xmin, ymin, w, h = bbox - xmax = xmin + w - ymax = ymin + h - - if catid not in catid2color: - idx = np.random.randint(len(color_list)) - catid2color[catid] = color_list[idx] - color = tuple(catid2color[catid]) - - # draw bbox - draw.line( - [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), - (xmin, ymin)], - width=2, - fill=color) - - # draw label - text = "{} {:.2f}".format(catid2name[catid], score) - tw, th = draw.textsize(text) - draw.rectangle([(xmin + 1, ymin - th), - (xmin + tw + 1, ymin)], - fill=color) - draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255)) - - return image diff --git a/PaddleCV/PaddleDetection/ppdet/utils/voc_eval.py b/PaddleCV/PaddleDetection/ppdet/utils/voc_eval.py deleted file mode 100644 index 67e66e4c683aaf7d5af2fd352817daf1ad3576d4..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/voc_eval.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import os -import sys -import numpy as np - -from ..data.source.voc_loader import pascalvoc_label -from .map_utils import DetectionMAP -from .coco_eval import bbox2out - -import logging -logger = logging.getLogger(__name__) - -__all__ = [ - 'bbox_eval', 'bbox2out', 'get_category_info' -] - - -def bbox_eval(results, - class_num, - overlap_thresh=0.5, - map_type='11point', - is_bbox_normalized=False, - evaluate_difficult=False): - """ - Bounding box evaluation for VOC dataset - - Args: - results (list): prediction bounding box results. - class_num (int): evaluation class number. - overlap_thresh (float): the postive threshold of - bbox overlap - map_type (string): method for mAP calcualtion, - can only be '11point' or 'integral' - is_bbox_normalized (bool): whether bbox is normalized - to range [0, 1]. - evaluate_difficult (bool): whether to evaluate - difficult gt bbox. - """ - assert 'bbox' in results[0] - logger.info("Start evaluate...") - - detection_map = DetectionMAP(class_num=class_num, - overlap_thresh=overlap_thresh, - map_type=map_type, - is_bbox_normalized=is_bbox_normalized, - evaluate_difficult=evaluate_difficult) - - for t in results: - bboxes = t['bbox'][0] - bbox_lengths = t['bbox'][1][0] - - if bboxes.shape == (1, 1) or bboxes is None: - continue - - gt_boxes = t['gt_box'][0] - gt_labels = t['gt_label'][0] - difficults = t['is_difficult'][0] if not evaluate_difficult \ - else None - - if len(t['gt_box'][1]) == 0: - # gt_box, gt_label, difficult read as zero padded Tensor - bbox_idx = 0 - for i in range(len(gt_boxes)): - gt_box = gt_boxes[i] - gt_label = gt_labels[i] - difficult = None if difficults is None \ - else difficults[i] - bbox_num = bbox_lengths[i] - bbox = bboxes[bbox_idx: bbox_idx + bbox_num] - gt_box, gt_label, difficult = prune_zero_padding( - gt_box, gt_label, difficult) - detection_map.update(bbox, gt_box, gt_label, difficult) - bbox_idx += bbox_num - else: - # gt_box, gt_label, difficult read as LoDTensor - gt_box_lengths = t['gt_box'][1][0] - bbox_idx = 0 - gt_box_idx = 0 - for i in range(len(bbox_lengths)): - bbox_num = bbox_lengths[i] - gt_box_num = gt_box_lengths[i] - bbox = bboxes[bbox_idx: bbox_idx + bbox_num] - gt_box = gt_boxes[gt_box_idx: gt_box_idx + gt_box_num] - gt_label = gt_labels[gt_box_idx: gt_box_idx + gt_box_num] - difficult = None if difficults is None else \ - difficults[gt_box_idx: gt_box_idx + gt_box_num] - detection_map.update(bbox, gt_box, gt_label, difficult) - bbox_idx += bbox_num - gt_box_idx += gt_box_num - - logger.info("Accumulating evaluatation results...") - detection_map.accumulate() - map_stat = 100. * detection_map.get_map() - logger.info("mAP({:.2f}, {}) = {:.2f}".format(overlap_thresh, - map_type, map_stat)) - return map_stat - - -def prune_zero_padding(gt_box, gt_label, difficult=None): - valid_cnt = 0 - for i in range(len(gt_box)): - if gt_box[i, 0] == 0 and gt_box[i, 1] == 0 and \ - gt_box[i, 2] == 0 and gt_box[i, 3] == 0: - break - valid_cnt += 1 - return (gt_box[:valid_cnt], gt_label[:valid_cnt], - difficult[:valid_cnt] if difficult is not None else None) - - -def get_category_info(anno_file=None, - with_background=True, - use_default_label=False): - if use_default_label or anno_file is None \ - or not os.path.exists(anno_file): - logger.info("Not found annotation file {}, load " - "voc2012 categories.".format(anno_file)) - return vocall_category_info(with_background) - else: - logger.info("Load categories from {}".format(anno_file)) - return get_category_info_from_anno(anno_file, with_background) - - -def get_category_info_from_anno(anno_file, with_background=True): - """ - Get class id to category id map and category id - to category name map from annotation file. - - Args: - anno_file (str): annotation file path - with_background (bool, default True): - whether load background as class 0. - """ - cats = [] - with open(anno_file) as f: - for line in f.readlines(): - cats.append(line.strip()) - - if cats[0] != 'background' and with_background: - cats.insert(0, 'background') - if cats[0] == 'background' and not with_background: - cats = cats[1:] - - clsid2catid = {i: i for i in range(len(cats))} - catid2name = {i: name for i, name in enumerate(cats)} - - return clsid2catid, catid2name - - -def vocall_category_info(with_background=True): - """ - Get class id to category id map and category id - to category name map of mixup voc dataset - - Args: - with_background (bool, default True): - whether load background as class 0. - """ - label_map = pascalvoc_label(with_background) - label_map = sorted(label_map.items(), key=lambda x: x[1]) - cats = [l[0] for l in label_map] - - if with_background: - cats.insert(0, 'background') - - clsid2catid = {i: i for i in range(len(cats))} - catid2name = {i: name for i, name in enumerate(cats)} - - return clsid2catid, catid2name diff --git a/PaddleCV/PaddleDetection/ppdet/utils/voc_utils.py b/PaddleCV/PaddleDetection/ppdet/utils/voc_utils.py deleted file mode 100644 index 88252e2351164cf3402e94a2c0e6f49277d7736b..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/voc_utils.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import os.path as osp -import re -import random -import shutil - -__all__ = ['create_list'] - - -def create_list(devkit_dir, years, output_dir): - """ - create following list: - 1. trainval.txt - 2. test.txt - """ - trainval_list = [] - test_list = [] - for year in years: - trainval, test = _walk_voc_dir(devkit_dir, year, output_dir) - trainval_list.extend(trainval) - test_list.extend(test) - - random.shuffle(trainval_list) - with open(osp.join(output_dir, 'trainval.txt'), 'w') as ftrainval: - for item in trainval_list: - ftrainval.write(item[0] + ' ' + item[1] + '\n') - - with open(osp.join(output_dir, 'test.txt'), 'w') as fval: - ct = 0 - for item in test_list: - ct += 1 - fval.write(item[0] + ' ' + item[1] + '\n') - - -def _get_voc_dir(devkit_dir, year, type): - return osp.join(devkit_dir, 'VOC' + year, type) - - -def _walk_voc_dir(devkit_dir, year, output_dir): - filelist_dir = _get_voc_dir(devkit_dir, year, 'ImageSets/Main') - annotation_dir = _get_voc_dir(devkit_dir, year, 'Annotations') - img_dir = _get_voc_dir(devkit_dir, year, 'JPEGImages') - trainval_list = [] - test_list = [] - added = set() - - for _, _, files in os.walk(filelist_dir): - for fname in files: - img_ann_list = [] - if re.match('[a-z]+_trainval\.txt', fname): - img_ann_list = trainval_list - elif re.match('[a-z]+_test\.txt', fname): - img_ann_list = test_list - else: - continue - fpath = osp.join(filelist_dir, fname) - for line in open(fpath): - name_prefix = line.strip().split()[0] - if name_prefix in added: - continue - added.add(name_prefix) - ann_path = osp.join(osp.relpath(annotation_dir, output_dir), - name_prefix + '.xml') - img_path = osp.join(osp.relpath(img_dir, output_dir), - name_prefix + '.jpg') - img_ann_list.append((img_path, ann_path)) - - return trainval_list, test_list diff --git a/PaddleCV/PaddleDetection/ppdet/utils/widerface_eval_utils.py b/PaddleCV/PaddleDetection/ppdet/utils/widerface_eval_utils.py deleted file mode 100644 index a19cd08350e7be9b3c7db8aa45e95f86b7088bf5..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/ppdet/utils/widerface_eval_utils.py +++ /dev/null @@ -1,227 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import numpy as np - -from ppdet.data.source.widerface_loader import widerface_label -from ppdet.utils.coco_eval import bbox2out - -import logging -logger = logging.getLogger(__name__) - -__all__ = [ - 'get_shrink', 'bbox_vote', 'save_widerface_bboxes', 'save_fddb_bboxes', - 'to_chw_bgr', 'bbox2out', 'get_category_info' -] - - -def to_chw_bgr(image): - """ - Transpose image from HWC to CHW and from RBG to BGR. - Args: - image (np.array): an image with HWC and RBG layout. - """ - # HWC to CHW - if len(image.shape) == 3: - image = np.swapaxes(image, 1, 2) - image = np.swapaxes(image, 1, 0) - # RBG to BGR - image = image[[2, 1, 0], :, :] - return image - - -def bbox_vote(det): - order = det[:, 4].ravel().argsort()[::-1] - det = det[order, :] - if det.shape[0] == 0: - dets = np.array([[10, 10, 20, 20, 0.002]]) - det = np.empty(shape=[0, 5]) - while det.shape[0] > 0: - # IOU - area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) - xx1 = np.maximum(det[0, 0], det[:, 0]) - yy1 = np.maximum(det[0, 1], det[:, 1]) - xx2 = np.minimum(det[0, 2], det[:, 2]) - yy2 = np.minimum(det[0, 3], det[:, 3]) - w = np.maximum(0.0, xx2 - xx1 + 1) - h = np.maximum(0.0, yy2 - yy1 + 1) - inter = w * h - o = inter / (area[0] + area[:] - inter) - - # nms - merge_index = np.where(o >= 0.3)[0] - det_accu = det[merge_index, :] - det = np.delete(det, merge_index, 0) - if merge_index.shape[0] <= 1: - if det.shape[0] == 0: - try: - dets = np.row_stack((dets, det_accu)) - except: - dets = det_accu - continue - det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) - max_score = np.max(det_accu[:, 4]) - det_accu_sum = np.zeros((1, 5)) - det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], - axis=0) / np.sum(det_accu[:, -1:]) - det_accu_sum[:, 4] = max_score - try: - dets = np.row_stack((dets, det_accu_sum)) - except: - dets = det_accu_sum - dets = dets[0:750, :] - # Only keep 0.3 or more - keep_index = np.where(dets[:, 4] >= 0.01)[0] - dets = dets[keep_index, :] - return dets - - -def get_shrink(height, width): - """ - Args: - height (int): image height. - width (int): image width. - """ - # avoid out of memory - max_shrink_v1 = (0x7fffffff / 577.0 / (height * width))**0.5 - max_shrink_v2 = ((678 * 1024 * 2.0 * 2.0) / (height * width))**0.5 - - def get_round(x, loc): - str_x = str(x) - if '.' in str_x: - str_before, str_after = str_x.split('.') - len_after = len(str_after) - if len_after >= 3: - str_final = str_before + '.' + str_after[0:loc] - return float(str_final) - else: - return x - - max_shrink = get_round(min(max_shrink_v1, max_shrink_v2), 2) - 0.3 - if max_shrink >= 1.5 and max_shrink < 2: - max_shrink = max_shrink - 0.1 - elif max_shrink >= 2 and max_shrink < 3: - max_shrink = max_shrink - 0.2 - elif max_shrink >= 3 and max_shrink < 4: - max_shrink = max_shrink - 0.3 - elif max_shrink >= 4 and max_shrink < 5: - max_shrink = max_shrink - 0.4 - elif max_shrink >= 5: - max_shrink = max_shrink - 0.5 - - shrink = max_shrink if max_shrink < 1 else 1 - return shrink, max_shrink - - -def save_widerface_bboxes(image_path, bboxes_scores, output_dir): - image_name = image_path.split('/')[-1] - image_class = image_path.split('/')[-2] - odir = os.path.join(output_dir, image_class) - if not os.path.exists(odir): - os.makedirs(odir) - - ofname = os.path.join(odir, '%s.txt' % (image_name[:-4])) - f = open(ofname, 'w') - f.write('{:s}\n'.format(image_class + '/' + image_name)) - f.write('{:d}\n'.format(bboxes_scores.shape[0])) - for box_score in bboxes_scores: - xmin, ymin, xmax, ymax, score = box_score - f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(xmin, ymin, ( - xmax - xmin + 1), (ymax - ymin + 1), score)) - f.close() - logger.info("The predicted result is saved as {}".format(ofname)) - - -def save_fddb_bboxes(bboxes_scores, - output_dir, - output_fname='pred_fddb_res.txt'): - if not os.path.exists(output_dir): - os.makedirs(output_dir) - predict_file = os.path.join(output_dir, output_fname) - f = open(predict_file, 'w') - for image_path, dets in bboxes_scores.iteritems(): - f.write('{:s}\n'.format(image_path)) - f.write('{:d}\n'.format(dets.shape[0])) - for box_score in dets: - xmin, ymin, xmax, ymax, score = box_score - width, height = xmax - xmin, ymax - ymin - f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n' - .format(xmin, ymin, width, height, score)) - logger.info("The predicted result is saved as {}".format(predict_file)) - return predict_file - - -def get_category_info(anno_file=None, - with_background=True, - use_default_label=False): - if use_default_label or anno_file is None \ - or not os.path.exists(anno_file): - logger.info("Not found annotation file {}, load " - "wider-face categories.".format(anno_file)) - return widerfaceall_category_info(with_background) - else: - logger.info("Load categories from {}".format(anno_file)) - return get_category_info_from_anno(anno_file, with_background) - - -def get_category_info_from_anno(anno_file, with_background=True): - """ - Get class id to category id map and category id - to category name map from annotation file. - Args: - anno_file (str): annotation file path - with_background (bool, default True): - whether load background as class 0. - """ - cats = [] - with open(anno_file) as f: - for line in f.readlines(): - cats.append(line.strip()) - - if cats[0] != 'background' and with_background: - cats.insert(0, 'background') - if cats[0] == 'background' and not with_background: - cats = cats[1:] - - clsid2catid = {i: i for i in range(len(cats))} - catid2name = {i: name for i, name in enumerate(cats)} - - return clsid2catid, catid2name - - -def widerfaceall_category_info(with_background=True): - """ - Get class id to category id map and category id - to category name map of mixup wider_face dataset - - Args: - with_background (bool, default True): - whether load background as class 0. - """ - label_map = widerface_label(with_background) - label_map = sorted(label_map.items(), key=lambda x: x[1]) - cats = [l[0] for l in label_map] - - if with_background: - cats.insert(0, 'background') - - clsid2catid = {i: i for i in range(len(cats))} - catid2name = {i: name for i, name in enumerate(cats)} - - return clsid2catid, catid2name diff --git a/PaddleCV/PaddleDetection/requirements.txt b/PaddleCV/PaddleDetection/requirements.txt deleted file mode 100644 index 798b006ad7baa3b4f24b4a7db15a63d6d2e533f4..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -tqdm -docstring_parser @ http://github.com/willthefrog/docstring_parser/tarball/master -typeguard ; python_version >= '3.4' -tb-paddle -tb-nightly diff --git a/PaddleCV/PaddleDetection/slim/distillation/README.md b/PaddleCV/PaddleDetection/slim/distillation/README.md deleted file mode 100755 index e46e6a2c92ac502f48d7d929a81b61228ed10d7a..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/slim/distillation/README.md +++ /dev/null @@ -1,141 +0,0 @@ ->运行该示例前请安装Paddle1.6或更高版本 - -# 检测模型蒸馏示例 - -## 概述 - -该示例使用PaddleSlim提供的[蒸馏策略](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/tutorial.md#3-蒸馏)对检测库中的模型进行蒸馏训练。 -在阅读该示例前,建议您先了解以下内容: - -- [检测库的常规训练方法](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/PaddleDetection) -- [PaddleSlim使用文档](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md) - - -## 配置文件说明 - -关于配置文件如何编写您可以参考: - -- [PaddleSlim配置文件编写说明](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md#122-%E9%85%8D%E7%BD%AE%E6%96%87%E4%BB%B6%E7%9A%84%E4%BD%BF%E7%94%A8) -- [蒸馏策略配置文件编写说明](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md#23-蒸馏) - -这里以ResNet34-YoloV3蒸馏MobileNetV1-YoloV3模型为例,首先,为了对`student model`和`teacher model`有个总体的认识,从而进一步确认蒸馏的对象,我们通过以下命令分别观察两个网络变量(Variable)的名称和形状: - -```python -# 观察student model的Variable -for v in fluid.default_main_program().list_vars(): - if "py_reader" not in v.name and "double_buffer" not in v.name and "generated_var" not in v.name: - print(v.name, v.shape) -# 观察teacher model的Variable -for v in teacher_program.list_vars(): - print(v.name, v.shape) -``` - -经过对比可以发现,`student model`和`teacher model`的部分中间结果分别为: - -```bash -# student model -conv2d_15.tmp_0 -# teacher model -teacher_teacher_conv2d_1.tmp_0 -``` - - -所以,我们用`l2_distiller`对这两个特征图做蒸馏。在配置文件中进行如下配置: - -```yaml -distillers: - l2_distiller: - class: 'L2Distiller' - teacher_feature_map: 'teacher_teacher_conv2d_1.tmp_0' - student_feature_map: 'conv2d_15.tmp_0' - distillation_loss_weight: 1 -strategies: - distillation_strategy: - class: 'DistillationStrategy' - distillers: ['l2_distiller'] - start_epoch: 0 - end_epoch: 270 -``` - -我们也可以根据上述操作为蒸馏策略选择其他loss,PaddleSlim支持的有`FSP_loss`, `L2_loss`和`softmax_with_cross_entropy_loss` 。 - -## 训练 - -根据[PaddleDetection/tools/train.py](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/PaddleDetection/tools/train.py)编写压缩脚本compress.py。 -在该脚本中定义了Compressor对象,用于执行压缩任务。 - - - - -您可以通过运行脚本`run.sh`运行该示例。 - - -### 保存断点(checkpoint) - -如果在配置文件中设置了`checkpoint_path`, 则在蒸馏任务执行过程中会自动保存断点,当任务异常中断时, -重启任务会自动从`checkpoint_path`路径下按数字顺序加载最新的checkpoint文件。如果不想让重启的任务从断点恢复, -需要修改配置文件中的`checkpoint_path`,或者将`checkpoint_path`路径下文件清空。 - ->注意:配置文件中的信息不会保存在断点中,重启前对配置文件的修改将会生效。 - - -## 评估 - -如果在配置文件中设置了`checkpoint_path`,则每个epoch会保存一个压缩后的用于评估的模型, -该模型会保存在`${checkpoint_path}/${epoch_id}/eval_model/`路径下,包含`__model__`和`__params__`两个文件。 -其中,`__model__`用于保存模型结构信息,`__params__`用于保存参数(parameters)信息。 - -如果不需要保存评估模型,可以在定义Compressor对象时,将`save_eval_model`选项设置为False(默认为True)。 - -运行命令为: -``` -python ../eval.py \ - --model_path ${checkpoint_path}/${epoch_id}/eval_model/ \ - --model_name __model__ \ - --params_name __params__ \ - -c ../../configs/yolov3_mobilenet_v1_voc.yml \ - -d "../../dataset/voc" -``` - -## 预测 - -如果在配置文件中设置了`checkpoint_path`,并且在定义Compressor对象时指定了`prune_infer_model`选项,则每个epoch都会 -保存一个`inference model`。该模型是通过删除eval_program中多余的operators而得到的。 - -该模型会保存在`${checkpoint_path}/${epoch_id}/eval_model/`路径下,包含`__model__.infer`和`__params__`两个文件。 -其中,`__model__.infer`用于保存模型结构信息,`__params__`用于保存参数(parameters)信息。 - -更多关于`prune_infer_model`选项的介绍,请参考:[Compressor介绍](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md#121-%E5%A6%82%E4%BD%95%E6%94%B9%E5%86%99%E6%99%AE%E9%80%9A%E8%AE%AD%E7%BB%83%E8%84%9A%E6%9C%AC) - -### python预测 - -在脚本slim/infer.py中展示了如何使用fluid python API加载使用预测模型进行预测。 - -运行命令为: -``` -python ../infer.py \ - --model_path ${checkpoint_path}/${epoch_id}/eval_model/ \ - --model_name __model__.infer \ - --params_name __params__ \ - -c ../../configs/yolov3_mobilenet_v1_voc.yml \ - --infer_dir ../../demo -``` - -### PaddleLite - -该示例中产出的预测(inference)模型可以直接用PaddleLite进行加载使用。 -关于PaddleLite如何使用,请参考:[PaddleLite使用文档](https://github.com/PaddlePaddle/Paddle-Lite/wiki#%E4%BD%BF%E7%94%A8) - -## 示例结果 - ->当前release的结果并非超参调优后的最好结果,仅做示例参考,后续我们会优化当前结果。 - -### MobileNetV1-YOLO-V3 - -| FLOPS |Box AP| -|---|---| -|baseline|76.2 | -|蒸馏后|76.27 | - - -## FAQ diff --git a/PaddleCV/PaddleDetection/slim/distillation/compress.py b/PaddleCV/PaddleDetection/slim/distillation/compress.py deleted file mode 100644 index 6dc4fff9742ec258b30cad4408b5e6f428273495..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/slim/distillation/compress.py +++ /dev/null @@ -1,324 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -import multiprocessing -import numpy as np -from collections import deque, OrderedDict -from paddle.fluid.contrib.slim.core import Compressor -from paddle.fluid.framework import IrGraph - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be set before -# `import paddle`. Otherwise, it would not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -from paddle import fluid - -import sys -sys.path.append("../../") -from ppdet.core.workspace import load_config, merge_config, create -from ppdet.data.data_feed import create_reader -from ppdet.utils.eval_utils import parse_fetches, eval_results -from ppdet.utils.stats import TrainingStats -from ppdet.utils.cli import ArgsParser -from ppdet.utils.check import check_gpu -import ppdet.utils.checkpoint as checkpoint -from ppdet.modeling.model_input import create_feed - -import logging -FORMAT = '%(asctime)s-%(levelname)s: %(message)s' -logging.basicConfig(level=logging.INFO, format=FORMAT) -logger = logging.getLogger(__name__) - - -def eval_run(exe, compile_program, reader, keys, values, cls, test_feed): - """ - Run evaluation program, return program outputs. - """ - iter_id = 0 - results = [] - if len(cls) != 0: - values = [] - for i in range(len(cls)): - _, accum_map = cls[i].get_map_var() - cls[i].reset(exe) - values.append(accum_map) - - images_num = 0 - start_time = time.time() - has_bbox = 'bbox' in keys - for data in reader(): - data = test_feed.feed(data) - feed_data = {'image': data['image'], 'im_size': data['im_size']} - outs = exe.run(compile_program, - feed=feed_data, - fetch_list=[values[0]], - return_numpy=False) - outs.append(data['gt_box']) - outs.append(data['gt_label']) - outs.append(data['is_difficult']) - res = { - k: (np.array(v), v.recursive_sequence_lengths()) - for k, v in zip(keys, outs) - } - results.append(res) - if iter_id % 100 == 0: - logger.info('Test iter {}'.format(iter_id)) - iter_id += 1 - images_num += len(res['bbox'][1][0]) if has_bbox else 1 - logger.info('Test finish iter {}'.format(iter_id)) - - end_time = time.time() - fps = images_num / (end_time - start_time) - if has_bbox: - logger.info('Total number of images: {}, inference time: {} fps.'. - format(images_num, fps)) - else: - logger.info('Total iteration: {}, inference time: {} batch/s.'.format( - images_num, fps)) - - return results - - -def main(): - cfg = load_config(FLAGS.config) - if 'architecture' in cfg: - main_arch = cfg.architecture - else: - raise ValueError("'architecture' not specified in config file.") - - merge_config(FLAGS.opt) - if 'log_iter' not in cfg: - cfg.log_iter = 20 - - # check if set use_gpu=True in paddlepaddle cpu version - check_gpu(cfg.use_gpu) - - if cfg.use_gpu: - devices_num = fluid.core.get_cuda_device_count() - else: - devices_num = int( - os.environ.get('CPU_NUM', multiprocessing.cpu_count())) - - if 'train_feed' not in cfg: - train_feed = create(main_arch + 'TrainFeed') - else: - train_feed = create(cfg.train_feed) - - if 'eval_feed' not in cfg: - eval_feed = create(main_arch + 'EvalFeed') - else: - eval_feed = create(cfg.eval_feed) - - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - lr_builder = create('LearningRate') - optim_builder = create('OptimizerBuilder') - - # build program - model = create(main_arch) - _, train_feed_vars = create_feed(train_feed, False) - train_fetches = model.train(train_feed_vars) - loss = train_fetches['loss'] - lr = lr_builder() - opt = optim_builder(lr) - opt.minimize(loss) - #for v in fluid.default_main_program().list_vars(): - # if "py_reader" not in v.name and "double_buffer" not in v.name and "generated_var" not in v.name: - # print(v.name, v.shape) - - cfg.max_iters = 258 - train_reader = create_reader(train_feed, cfg.max_iters, FLAGS.dataset_dir) - - exe.run(fluid.default_startup_program()) - - # parse train fetches - train_keys, train_values, _ = parse_fetches(train_fetches) - train_keys.append('lr') - train_values.append(lr.name) - - train_fetch_list = [] - for k, v in zip(train_keys, train_values): - train_fetch_list.append((k, v)) - print("train_fetch_list: {}".format(train_fetch_list)) - - eval_prog = fluid.Program() - startup_prog = fluid.Program() - with fluid.program_guard(eval_prog, startup_prog): - with fluid.unique_name.guard(): - model = create(main_arch) - _, test_feed_vars = create_feed(eval_feed, False) - fetches = model.eval(test_feed_vars) - eval_prog = eval_prog.clone(True) - - eval_reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir) - test_data_feed = fluid.DataFeeder(test_feed_vars.values(), place) - - # parse eval fetches - extra_keys = [] - if cfg.metric == 'COCO': - extra_keys = ['im_info', 'im_id', 'im_shape'] - if cfg.metric == 'VOC': - extra_keys = ['gt_box', 'gt_label', 'is_difficult'] - eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog, - extra_keys) - - eval_fetch_list = [] - for k, v in zip(eval_keys, eval_values): - eval_fetch_list.append((k, v)) - print("eval_fetch_list: {}".format(eval_fetch_list)) - - exe.run(startup_prog) - checkpoint.load_params(exe, - fluid.default_main_program(), cfg.pretrain_weights) - - best_box_ap_list = [] - - def eval_func(program, scope): - results = eval_run(exe, program, eval_reader, eval_keys, eval_values, - eval_cls, test_data_feed) - - resolution = None - is_bbox_normalized = False - if 'mask' in results[0]: - resolution = model.mask_head.resolution - box_ap_stats = eval_results(results, eval_feed, cfg.metric, - cfg.num_classes, resolution, - is_bbox_normalized, FLAGS.output_eval) - if len(best_box_ap_list) == 0: - best_box_ap_list.append(box_ap_stats[0]) - elif box_ap_stats[0] > best_box_ap_list[0]: - best_box_ap_list[0] = box_ap_stats[0] - logger.info("Best test box ap: {}".format(best_box_ap_list[0])) - return best_box_ap_list[0] - - test_feed = [('image', test_feed_vars['image'].name), - ('im_size', test_feed_vars['im_size'].name)] - - teacher_cfg = load_config(FLAGS.teacher_config) - teacher_arch = teacher_cfg.architecture - teacher_programs = [] - teacher_program = fluid.Program() - teacher_startup_program = fluid.Program() - with fluid.program_guard(teacher_program, teacher_startup_program): - with fluid.unique_name.guard('teacher_'): - teacher_feed_vars = OrderedDict() - for name, var in train_feed_vars.items(): - teacher_feed_vars[name] = teacher_program.global_block( - )._clone_variable( - var, force_persistable=False) - model = create(teacher_arch) - train_fetches = model.train(teacher_feed_vars) - #print("="*50+"teacher_model_params"+"="*50) - #for v in teacher_program.list_vars(): - # print(v.name, v.shape) - #return - - exe.run(teacher_startup_program) - assert FLAGS.teacher_pretrained and os.path.exists( - FLAGS.teacher_pretrained - ), "teacher_pretrained should be set when teacher_model is not None." - - def if_exist(var): - return os.path.exists(os.path.join(FLAGS.teacher_pretrained, var.name)) - - fluid.io.load_vars( - exe, - FLAGS.teacher_pretrained, - main_program=teacher_program, - predicate=if_exist) - - teacher_programs.append(teacher_program.clone(for_test=True)) - - com = Compressor( - place, - fluid.global_scope(), - fluid.default_main_program(), - train_reader=train_reader, - train_feed_list=[(key, value.name) - for key, value in train_feed_vars.items()], - train_fetch_list=train_fetch_list, - eval_program=eval_prog, - eval_reader=eval_reader, - eval_feed_list=test_feed, - eval_func={'map': eval_func}, - eval_fetch_list=eval_fetch_list[0:1], - save_eval_model=True, - prune_infer_model=[["image", "im_size"], ["multiclass_nms_0.tmp_0"]], - teacher_programs=teacher_programs, - train_optimizer=None, - distiller_optimizer=opt, - log_period=20) - com.config(FLAGS.slim_file) - com.run() - - -if __name__ == '__main__': - parser = ArgsParser() - parser.add_argument( - "-t", - "--teacher_config", - default=None, - type=str, - help="Config file of teacher architecture.") - parser.add_argument( - "-s", - "--slim_file", - default=None, - type=str, - help="Config file of PaddleSlim.") - parser.add_argument( - "-r", - "--resume_checkpoint", - default=None, - type=str, - help="Checkpoint path for resuming training.") - parser.add_argument( - "--eval", - action='store_true', - default=False, - help="Whether to perform evaluation in train") - parser.add_argument( - "--teacher_pretrained", - default=None, - type=str, - help="Whether to use pretrained model.") - parser.add_argument( - "--output_eval", - default=None, - type=str, - help="Evaluation directory, default is current directory.") - parser.add_argument( - "-d", - "--dataset_dir", - default=None, - type=str, - help="Dataset path, same as DataFeed.dataset.dataset_dir") - FLAGS = parser.parse_args() - main() diff --git a/PaddleCV/PaddleDetection/slim/distillation/run.sh b/PaddleCV/PaddleDetection/slim/distillation/run.sh deleted file mode 100644 index a5497bdce2464c72e14fa2168b87db60685e83e8..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/slim/distillation/run.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env bash - -# download pretrain model -root_url="https://paddlemodels.bj.bcebos.com/object_detection" -yolov3_r34_voc="yolov3_r34_voc.tar" -pretrain_dir='./pretrain' - -if [ ! -d ${pretrain_dir} ]; then - mkdir ${pretrain_dir} -fi - -cd ${pretrain_dir} - -if [ ! -f ${yolov3_r34_voc} ]; then - wget ${root_url}/${yolov3_r34_voc} - tar xf ${yolov3_r34_voc} -fi -cd - - -# enable GC strategy -export FLAGS_fast_eager_deletion_mode=1 -export FLAGS_eager_delete_tensor_gb=0.0 - -# for distillation -#----------------- -export CUDA_VISIBLE_DEVICES=0,1,2,3 - - -# Fixing name conflicts in distillation -cd ${pretrain_dir}/yolov3_r34_voc -for files in $(ls teacher_*) - do mv $files ${files#*_} -done -for files in $(ls *) - do mv $files "teacher_"$files -done -cd - - -python -u compress.py \ --c ../../configs/yolov3_mobilenet_v1_voc.yml \ --t yolov3_resnet34.yml \ --s yolov3_mobilenet_v1_yolov3_resnet34_distillation.yml \ --o YoloTrainFeed.batch_size=64 \ --d ../../dataset/voc \ ---teacher_pretrained ./pretrain/yolov3_r34_voc \ -> yolov3_distallation.log 2>&1 & -tailf yolov3_distallation.log diff --git a/PaddleCV/PaddleDetection/slim/distillation/yolov3_mobilenet_v1_yolov3_resnet34_distillation.yml b/PaddleCV/PaddleDetection/slim/distillation/yolov3_mobilenet_v1_yolov3_resnet34_distillation.yml deleted file mode 100644 index 6a2a5a2575b71563c557c528a4dc94f00dce73ca..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/slim/distillation/yolov3_mobilenet_v1_yolov3_resnet34_distillation.yml +++ /dev/null @@ -1,18 +0,0 @@ -version: 1.0 -distillers: - l2_distiller: - class: 'L2Distiller' - teacher_feature_map: 'teacher_teacher_conv2d_1.tmp_0' - student_feature_map: 'conv2d_15.tmp_0' - distillation_loss_weight: 1 -strategies: - distillation_strategy: - class: 'DistillationStrategy' - distillers: ['l2_distiller'] - start_epoch: 0 - end_epoch: 270 -compressor: - epoch: 271 - checkpoint_path: './checkpoints/' - strategies: - - distillation_strategy diff --git a/PaddleCV/PaddleDetection/slim/distillation/yolov3_resnet34.yml b/PaddleCV/PaddleDetection/slim/distillation/yolov3_resnet34.yml deleted file mode 100644 index c04bdde9f6e35a1ce53231e3862a37364ff7dbb8..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/slim/distillation/yolov3_resnet34.yml +++ /dev/null @@ -1,34 +0,0 @@ -architecture: YOLOv3 -log_smooth_window: 20 -metric: VOC -map_type: 11point -num_classes: 20 -weight_prefix_name: teacher_ - -YOLOv3: - backbone: ResNet - yolo_head: YOLOv3Head - -ResNet: - norm_type: sync_bn - freeze_at: 0 - freeze_norm: false - norm_decay: 0. - depth: 34 - feature_maps: [3, 4, 5] - -YOLOv3Head: - anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - anchors: [[10, 13], [16, 30], [33, 23], - [30, 61], [62, 45], [59, 119], - [116, 90], [156, 198], [373, 326]] - norm_decay: 0. - ignore_thresh: 0.7 - label_smooth: false - nms: - background_label: -1 - keep_top_k: 100 - nms_threshold: 0.45 - nms_top_k: 1000 - normalized: false - score_threshold: 0.01 diff --git a/PaddleCV/PaddleDetection/slim/eval.py b/PaddleCV/PaddleDetection/slim/eval.py deleted file mode 100644 index 579f58d2932e215362efa9e8864c16d1a451b26a..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/slim/eval.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -import multiprocessing -import numpy as np -import datetime -from collections import deque -import sys -sys.path.append("../../") -from paddle.fluid.contrib.slim import Compressor -from paddle.fluid.framework import IrGraph -from paddle.fluid import core -from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass -from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass -from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass -from paddle.fluid.contrib.slim.quantization import TransformForMobilePass - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be set before -# `import paddle`. Otherwise, it would not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -from paddle import fluid - -from ppdet.core.workspace import load_config, merge_config, create -from ppdet.data.data_feed import create_reader - -from ppdet.utils.eval_utils import parse_fetches, eval_results -from ppdet.utils.stats import TrainingStats -from ppdet.utils.cli import ArgsParser -from ppdet.utils.check import check_gpu -import ppdet.utils.checkpoint as checkpoint -from ppdet.modeling.model_input import create_feed - -import logging -FORMAT = '%(asctime)s-%(levelname)s: %(message)s' -logging.basicConfig(level=logging.INFO, format=FORMAT) -logger = logging.getLogger(__name__) - - -def eval_run(exe, compile_program, reader, keys, values, cls, test_feed): - """ - Run evaluation program, return program outputs. - """ - iter_id = 0 - results = [] - - images_num = 0 - start_time = time.time() - has_bbox = 'bbox' in keys - for data in reader(): - data = test_feed.feed(data) - feed_data = {'image': data['image'], 'im_size': data['im_size']} - outs = exe.run(compile_program, - feed=feed_data, - fetch_list=values[0], - return_numpy=False) - outs.append(data['gt_box']) - outs.append(data['gt_label']) - outs.append(data['is_difficult']) - res = { - k: (np.array(v), v.recursive_sequence_lengths()) - for k, v in zip(keys, outs) - } - results.append(res) - if iter_id % 100 == 0: - logger.info('Test iter {}'.format(iter_id)) - iter_id += 1 - images_num += len(res['bbox'][1][0]) if has_bbox else 1 - logger.info('Test finish iter {}'.format(iter_id)) - - end_time = time.time() - fps = images_num / (end_time - start_time) - if has_bbox: - logger.info('Total number of images: {}, inference time: {} fps.'. - format(images_num, fps)) - else: - logger.info('Total iteration: {}, inference time: {} batch/s.'.format( - images_num, fps)) - - return results - - -def main(): - cfg = load_config(FLAGS.config) - if 'architecture' in cfg: - main_arch = cfg.architecture - else: - raise ValueError("'architecture' not specified in config file.") - - merge_config(FLAGS.opt) - if 'log_iter' not in cfg: - cfg.log_iter = 20 - - # check if set use_gpu=True in paddlepaddle cpu version - check_gpu(cfg.use_gpu) - - if cfg.use_gpu: - devices_num = fluid.core.get_cuda_device_count() - else: - devices_num = int( - os.environ.get('CPU_NUM', multiprocessing.cpu_count())) - - if 'eval_feed' not in cfg: - eval_feed = create(main_arch + 'EvalFeed') - else: - eval_feed = create(cfg.eval_feed) - - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - _, test_feed_vars = create_feed(eval_feed, False) - - eval_reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir) - #eval_pyreader.decorate_sample_list_generator(eval_reader, place) - test_data_feed = fluid.DataFeeder(test_feed_vars.values(), place) - - assert os.path.exists(FLAGS.model_path) - infer_prog, feed_names, fetch_targets = fluid.io.load_inference_model( - dirname=FLAGS.model_path, - executor=exe, - model_filename=FLAGS.model_name, - params_filename=FLAGS.params_name) - - eval_keys = ['bbox', 'gt_box', 'gt_label', 'is_difficult'] - eval_values = [ - 'multiclass_nms_0.tmp_0', 'gt_box', 'gt_label', 'is_difficult' - ] - eval_cls = [] - eval_values[0] = fetch_targets[0] - - results = eval_run(exe, infer_prog, eval_reader, eval_keys, eval_values, - eval_cls, test_data_feed) - - resolution = None - if 'mask' in results[0]: - resolution = model.mask_head.resolution - eval_results(results, eval_feed, cfg.metric, cfg.num_classes, resolution, - False, FLAGS.output_eval) - - -if __name__ == '__main__': - parser = ArgsParser() - parser.add_argument( - "-m", "--model_path", default=None, type=str, help="path of checkpoint") - parser.add_argument( - "--output_eval", - default=None, - type=str, - help="Evaluation directory, default is current directory.") - parser.add_argument( - "-d", - "--dataset_dir", - default=None, - type=str, - help="Dataset path, same as DataFeed.dataset.dataset_dir") - parser.add_argument( - "--model_name", - default='model', - type=str, - help="model file name to load_inference_model") - parser.add_argument( - "--params_name", - default='params', - type=str, - help="params file name to load_inference_model") - - FLAGS = parser.parse_args() - main() diff --git a/PaddleCV/PaddleDetection/slim/infer.py b/PaddleCV/PaddleDetection/slim/infer.py deleted file mode 100644 index a5c00de983a15c40db60deb9097c3d8f92a557aa..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/slim/infer.py +++ /dev/null @@ -1,300 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import glob -import time - -import numpy as np -from PIL import Image -sys.path.append("../../") - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be set before -# `import paddle`. Otherwise, it would not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -from paddle import fluid -from ppdet.utils.cli import print_total_cfg -from ppdet.core.workspace import load_config, merge_config, create -from ppdet.modeling.model_input import create_feed -from ppdet.data.data_feed import create_reader - -from ppdet.utils.eval_utils import parse_fetches -from ppdet.utils.cli import ArgsParser -from ppdet.utils.check import check_gpu -from ppdet.utils.visualizer import visualize_results -import ppdet.utils.checkpoint as checkpoint - -import logging -FORMAT = '%(asctime)s-%(levelname)s: %(message)s' -logging.basicConfig(level=logging.INFO, format=FORMAT) -logger = logging.getLogger(__name__) - - -def get_save_image_name(output_dir, image_path): - """ - Get save image name from source image path. - """ - if not os.path.exists(output_dir): - os.makedirs(output_dir) - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - return os.path.join(output_dir, "{}".format(name)) + ext - - -def get_test_images(infer_dir, infer_img): - """ - Get image path list in TEST mode - """ - assert infer_img is not None or infer_dir is not None, \ - "--infer_img or --infer_dir should be set" - assert infer_img is None or os.path.isfile(infer_img), \ - "{} is not a file".format(infer_img) - assert infer_dir is None or os.path.isdir(infer_dir), \ - "{} is not a directory".format(infer_dir) - images = [] - - # infer_img has a higher priority - if infer_img and os.path.isfile(infer_img): - images.append(infer_img) - return images - - infer_dir = os.path.abspath(infer_dir) - assert os.path.isdir(infer_dir), \ - "infer_dir {} is not a directory".format(infer_dir) - exts = ['jpg', 'jpeg', 'png', 'bmp'] - exts += [ext.upper() for ext in exts] - for ext in exts: - images.extend(glob.glob('{}/*.{}'.format(infer_dir, ext))) - - assert len(images) > 0, "no image found in {}".format(infer_dir) - logger.info("Found {} inference images in total.".format(len(images))) - - return images - - -def main(): - cfg = load_config(FLAGS.config) - - if 'architecture' in cfg: - main_arch = cfg.architecture - else: - raise ValueError("'architecture' not specified in config file.") - - merge_config(FLAGS.opt) - - # check if set use_gpu=True in paddlepaddle cpu version - check_gpu(cfg.use_gpu) - # print_total_cfg(cfg) - - if 'test_feed' not in cfg: - test_feed = create(main_arch + 'TestFeed') - else: - test_feed = create(cfg.test_feed) - - test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img) - test_feed.dataset.add_images(test_images) - - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - infer_prog, feed_var_names, fetch_list = fluid.io.load_inference_model( - dirname=FLAGS.model_path, - model_filename=FLAGS.model_name, - params_filename=FLAGS.params_name, - executor=exe) - - reader = create_reader(test_feed) - feeder = fluid.DataFeeder( - place=place, feed_list=feed_var_names, program=infer_prog) - - # parse infer fetches - assert cfg.metric in ['COCO', 'VOC'], \ - "unknown metric type {}".format(cfg.metric) - extra_keys = [] - if cfg['metric'] == 'COCO': - extra_keys = ['im_info', 'im_id', 'im_shape'] - if cfg['metric'] == 'VOC': - extra_keys = ['im_id', 'im_shape'] - keys, values, _ = parse_fetches({ - 'bbox': fetch_list - }, infer_prog, extra_keys) - - # parse dataset category - if cfg.metric == 'COCO': - from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info - if cfg.metric == "VOC": - from ppdet.utils.voc_eval import bbox2out, get_category_info - - anno_file = getattr(test_feed.dataset, 'annotation', None) - with_background = getattr(test_feed, 'with_background', True) - use_default_label = getattr(test_feed, 'use_default_label', False) - clsid2catid, catid2name = get_category_info(anno_file, with_background, - use_default_label) - - # whether output bbox is normalized in model output layer - is_bbox_normalized = False - - # use tb-paddle to log image - if FLAGS.use_tb: - from tb_paddle import SummaryWriter - tb_writer = SummaryWriter(FLAGS.tb_log_dir) - tb_image_step = 0 - tb_image_frame = 0 # each frame can display ten pictures at most. - - imid2path = reader.imid2path - keys = ['bbox'] - infer_time = True - compile_prog = fluid.compiler.CompiledProgram(infer_prog) - - for iter_id, data in enumerate(reader()): - feed_data = [[d[0], d[1]] for d in data] - # for infer time - if infer_time: - warmup_times = 10 - repeats_time = 100 - feed_data_dict = feeder.feed(feed_data) - for i in range(warmup_times): - exe.run(compile_prog, - feed=feed_data_dict, - fetch_list=fetch_list, - return_numpy=False) - start_time = time.time() - for i in range(repeats_time): - exe.run(compile_prog, - feed=feed_data_dict, - fetch_list=fetch_list, - return_numpy=False) - - print("infer time: {} ms/sample".format((time.time() - start_time) * - 1000 / repeats_time)) - infer_time = False - - outs = exe.run(compile_prog, - feed=feeder.feed(feed_data), - fetch_list=fetch_list, - return_numpy=False) - res = { - k: (np.array(v), v.recursive_sequence_lengths()) - for k, v in zip(keys, outs) - } - res['im_id'] = [[d[2] for d in data]] - logger.info('Infer iter {}'.format(iter_id)) - - bbox_results = None - mask_results = None - if 'bbox' in res: - bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized) - if 'mask' in res: - mask_results = mask2out([res], clsid2catid, - model.mask_head.resolution) - - # visualize result - im_ids = res['im_id'][0] - for im_id in im_ids: - image_path = imid2path[int(im_id)] - image = Image.open(image_path).convert('RGB') - - # use tb-paddle to log original image - if FLAGS.use_tb: - original_image_np = np.array(image) - tb_writer.add_image( - "original/frame_{}".format(tb_image_frame), - original_image_np, - tb_image_step, - dataformats='HWC') - - image = visualize_results(image, - int(im_id), catid2name, - FLAGS.draw_threshold, bbox_results, - mask_results) - - # use tb-paddle to log image with bbox - if FLAGS.use_tb: - infer_image_np = np.array(image) - tb_writer.add_image( - "bbox/frame_{}".format(tb_image_frame), - infer_image_np, - tb_image_step, - dataformats='HWC') - tb_image_step += 1 - if tb_image_step % 10 == 0: - tb_image_step = 0 - tb_image_frame += 1 - - save_name = get_save_image_name(FLAGS.output_dir, image_path) - logger.info("Detection bbox results save in {}".format(save_name)) - image.save(save_name, quality=95) - - -if __name__ == '__main__': - parser = ArgsParser() - parser.add_argument( - "--infer_dir", - type=str, - default=None, - help="Directory for images to perform inference on.") - parser.add_argument( - "--infer_img", - type=str, - default=None, - help="Image path, has higher priority over --infer_dir") - parser.add_argument( - "--output_dir", - type=str, - default="output", - help="Directory for storing the output visualization files.") - parser.add_argument( - "--draw_threshold", - type=float, - default=0.5, - help="Threshold to reserve the result for visualization.") - parser.add_argument( - "--use_tb", - type=bool, - default=False, - help="whether to record the data to Tensorboard.") - parser.add_argument( - '--tb_log_dir', - type=str, - default="tb_log_dir/image", - help='Tensorboard logging directory for image.') - parser.add_argument( - '--model_path', type=str, default=None, help="inference model path") - parser.add_argument( - '--model_name', - type=str, - default='__model__.infer', - help="model filename for inference model") - parser.add_argument( - '--params_name', - type=str, - default='__params__', - help="params filename for inference model") - FLAGS = parser.parse_args() - main() diff --git a/PaddleCV/PaddleDetection/slim/prune/README.md b/PaddleCV/PaddleDetection/slim/prune/README.md deleted file mode 100644 index b06fdd2bdd6a3cd75eb00ab7952dfd546b2bfaad..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/slim/prune/README.md +++ /dev/null @@ -1,221 +0,0 @@ ->运行该示例前请安装Paddle1.6或更高版本 - -# 检测模型卷积通道剪裁示例 - -## 概述 - -该示例使用PaddleSlim提供的[卷积通道剪裁压缩策略](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/tutorial.md#2-%E5%8D%B7%E7%A7%AF%E6%A0%B8%E5%89%AA%E8%A3%81%E5%8E%9F%E7%90%86)对检测库中的模型进行压缩。 -在阅读该示例前,建议您先了解以下内容: - -- 检测库的常规训练方法 -- [检测模型数据准备](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/PaddleDetection/docs/INSTALL_cn.md#%E6%95%B0%E6%8D%AE%E9%9B%86) -- [PaddleSlim使用文档](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md) - - -## 配置文件说明 - -关于配置文件如何编写您可以参考: - -- [PaddleSlim配置文件编写说明](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md#122-%E9%85%8D%E7%BD%AE%E6%96%87%E4%BB%B6%E7%9A%84%E4%BD%BF%E7%94%A8) -- [裁剪策略配置文件编写说明](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md#22-%E6%A8%A1%E5%9E%8B%E9%80%9A%E9%81%93%E5%89%AA%E8%A3%81) - -其中,配置文件中的`pruned_params`需要根据当前模型的网络结构特点设置,它用来指定要裁剪的parameters. - -这里以MobileNetV1-YoloV3模型为例,其卷积可以三种:主干网络中的普通卷积,主干网络中的`depthwise convolution`和`yolo block`里的普通卷积。PaddleSlim暂时无法对`depthwise convolution`直接进行剪裁, 因为`depthwise convolution`的`channel`的变化会同时影响到前后的卷积层。我们这里只对主干网络中的普通卷积和`yolo block`里的普通卷积做裁剪。 - -通过以下方式可视化模型结构: - -``` -from paddle.fluid.framework import IrGraph -from paddle.fluid import core - -graph = IrGraph(core.Graph(train_prog.desc), for_test=True) -marked_nodes = set() -for op in graph.all_op_nodes(): - print(op.name()) - if op.name().find('conv') > -1: - marked_nodes.add(op) -graph.draw('.', 'forward', marked_nodes) -``` - -该示例中MobileNetV1-YoloV3模型结构的可视化结果:MobileNetV1-YoloV3.pdf - -同时通过以下命令观察目标卷积层的参数(parameters)的名称和shape: - -``` -for param in fluid.default_main_program().global_block().all_parameters(): - if 'weights' in param.name: - print(param.name, param.shape) -``` - - -从可视化结果,我们可以排除后续会做concat的卷积层,最终得到如下要裁剪的参数名称: - -``` -conv2_1_sep_weights -conv2_2_sep_weights -conv3_1_sep_weights -conv4_1_sep_weights -conv5_1_sep_weights -conv5_2_sep_weights -conv5_3_sep_weights -conv5_4_sep_weights -conv5_5_sep_weights -conv5_6_sep_weights -yolo_block.0.0.0.conv.weights -yolo_block.0.0.1.conv.weights -yolo_block.0.1.0.conv.weights -yolo_block.0.1.1.conv.weights -yolo_block.1.0.0.conv.weights -yolo_block.1.0.1.conv.weights -yolo_block.1.1.0.conv.weights -yolo_block.1.1.1.conv.weights -yolo_block.1.2.conv.weights -yolo_block.2.0.0.conv.weights -yolo_block.2.0.1.conv.weights -yolo_block.2.1.1.conv.weights -yolo_block.2.2.conv.weights -yolo_block.2.tip.conv.weights -``` - -``` -(conv2_1_sep_weights)|(conv2_2_sep_weights)|(conv3_1_sep_weights)|(conv4_1_sep_weights)|(conv5_1_sep_weights)|(conv5_2_sep_weights)|(conv5_3_sep_weights)|(conv5_4_sep_weights)|(conv5_5_sep_weights)|(conv5_6_sep_weights)|(yolo_block.0.0.0.conv.weights)|(yolo_block.0.0.1.conv.weights)|(yolo_block.0.1.0.conv.weights)|(yolo_block.0.1.1.conv.weights)|(yolo_block.1.0.0.conv.weights)|(yolo_block.1.0.1.conv.weights)|(yolo_block.1.1.0.conv.weights)|(yolo_block.1.1.1.conv.weights)|(yolo_block.1.2.conv.weights)|(yolo_block.2.0.0.conv.weights)|(yolo_block.2.0.1.conv.weights)|(yolo_block.2.1.1.conv.weights)|(yolo_block.2.2.conv.weights)|(yolo_block.2.tip.conv.weights) -``` - -综上,我们将MobileNetV2配置文件中的`pruned_params`设置为以下正则表达式: - -``` -(conv2_1_sep_weights)|(conv2_2_sep_weights)|(conv3_1_sep_weights)|(conv4_1_sep_weights)|(conv5_1_sep_weights)|(conv5_2_sep_weights)|(conv5_3_sep_weights)|(conv5_4_sep_weights)|(conv5_5_sep_weights)|(conv5_6_sep_weights)|(yolo_block.0.0.0.conv.weights)|(yolo_block.0.0.1.conv.weights)|(yolo_block.0.1.0.conv.weights)|(yolo_block.0.1.1.conv.weights)|(yolo_block.1.0.0.conv.weights)|(yolo_block.1.0.1.conv.weights)|(yolo_block.1.1.0.conv.weights)|(yolo_block.1.1.1.conv.weights)|(yolo_block.1.2.conv.weights)|(yolo_block.2.0.0.conv.weights)|(yolo_block.2.0.1.conv.weights)|(yolo_block.2.1.1.conv.weights)|(yolo_block.2.2.conv.weights)|(yolo_block.2.tip.conv.weights) -``` - -我们可以用上述操作观察其它检测模型的参数名称规律,然后设置合适的正则表达式来剪裁合适的参数。 - -## 训练 - -根据PaddleDetection/tools/train.py编写压缩脚本compress.py。 -在该脚本中定义了Compressor对象,用于执行压缩任务。 - -### 执行示例 - -step1: 设置gpu卡 -``` -export CUDA_VISIBLE_DEVICES=0 -``` -step2: 开始训练 - -使用PaddleDetection提供的配置文件在用8卡进行训练: - -``` -python compress.py \ - -s yolov3_mobilenet_v1_slim.yaml \ - -c ../../configs/yolov3_mobilenet_v1_voc.yml \ - -o max_iters=258 \ - YoloTrainFeed.batch_size=64 \ - -d "../../dataset/voc" -``` - ->通过命令行覆盖设置max_iters选项,因为PaddleDetection中训练是以`batch`为单位迭代的,并没有涉及`epoch`的概念,但是PaddleSlim需要知道当前训练进行到第几个`epoch`, 所以需要将`max_iters`设置为一个`epoch`内的`batch`的数量。 - -如果要调整训练卡数,需要调整配置文件`yolov3_mobilenet_v1_voc.yml`中的以下参数: - -- **max_iters:** 一个`epoch`中batch的数量,需要设置为`total_num / batch_size`, 其中`total_num`为训练样本总数量,`batch_size`为多卡上总的batch size. -- **YoloTrainFeed.batch_size:** 当使用DataLoader时,表示单张卡上的batch size; 当使用普通reader时,则表示多卡上的总的`batch_size`。`batch_size`受限于显存大小。 -- **LeaningRate.base_lr:** 根据多卡的总`batch_size`调整`base_lr`,两者大小正相关,可以简单的按比例进行调整。 -- **LearningRate.schedulers.PiecewiseDecay.milestones:** 请根据batch size的变化对其调整。 -- **LearningRate.schedulers.PiecewiseDecay.LinearWarmup.steps:** 请根据batch size的变化对其进行调整。 - - -以下为4卡训练示例,通过命令行覆盖`yolov3_mobilenet_v1_voc.yml`中的参数: - -``` -python compress.py \ - -s yolov3_mobilenet_v1_slim.yaml \ - -c ../../configs/yolov3_mobilenet_v1_voc.yml \ - -o max_iters=258 \ - YoloTrainFeed.batch_size=64 \ - -d "../../dataset/voc" -``` - -以下为2卡训练示例,受显存所制,单卡`batch_size`不变,总`batch_size`减小,`base_lr`减小,一个epoch内batch数量增加,同时需要调整学习率相关参数,如下: -``` -python compress.py \ - -s yolov3_mobilenet_v1_slim.yaml \ - -c ../../configs/yolov3_mobilenet_v1_voc.yml \ - -o max_iters=516 \ - LeaningRate.base_lr=0.005 \ - YoloTrainFeed.batch_size=32 \ - LearningRate.schedulers='[!PiecewiseDecay {gamma: 0.1, milestones: [110000, 124000]}, !LinearWarmup {start_factor: 0., steps: 2000}]' \ - -d "../../dataset/voc" -``` - -通过`python compress.py --help`查看可配置参数。 -通过`python ../../tools/configure.py ${option_name} help`查看如何通过命令行覆盖配置文件`yolov3_mobilenet_v1_voc.yml`中的参数。 - -### 保存断点(checkpoint) - -如果在配置文件中设置了`checkpoint_path`, 则在压缩任务执行过程中会自动保存断点,当任务异常中断时, -重启任务会自动从`checkpoint_path`路径下按数字顺序加载最新的checkpoint文件。如果不想让重启的任务从断点恢复, -需要修改配置文件中的`checkpoint_path`,或者将`checkpoint_path`路径下文件清空。 - ->注意:配置文件中的信息不会保存在断点中,重启前对配置文件的修改将会生效。 - - -## 评估 - -如果在配置文件中设置了`checkpoint_path`,则每个epoch会保存一个压缩后的用于评估的模型, -该模型会保存在`${checkpoint_path}/${epoch_id}/eval_model/`路径下,包含`__model__`和`__params__`两个文件。 -其中,`__model__`用于保存模型结构信息,`__params__`用于保存参数(parameters)信息。 - -如果不需要保存评估模型,可以在定义Compressor对象时,将`save_eval_model`选项设置为False(默认为True)。 - -运行命令为: -``` -python ../eval.py \ - --model_path ${checkpoint_path}/${epoch_id}/eval_model/ \ - --model_name __model__ \ - --params_name __params__ \ - -c ../../configs/yolov3_mobilenet_v1_voc.yml \ - -d "../../dataset/voc" -``` - -## 预测 - -如果在配置文件中设置了`checkpoint_path`,并且在定义Compressor对象时指定了`prune_infer_model`选项,则每个epoch都会 -保存一个`inference model`。该模型是通过删除eval_program中多余的operators而得到的。 - -该模型会保存在`${checkpoint_path}/${epoch_id}/eval_model/`路径下,包含`__model__.infer`和`__params__`两个文件。 -其中,`__model__.infer`用于保存模型结构信息,`__params__`用于保存参数(parameters)信息。 - -更多关于`prune_infer_model`选项的介绍,请参考:[Compressor介绍](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md#121-%E5%A6%82%E4%BD%95%E6%94%B9%E5%86%99%E6%99%AE%E9%80%9A%E8%AE%AD%E7%BB%83%E8%84%9A%E6%9C%AC) - -### python预测 - -在脚本PaddleDetection/tools/infer.py中展示了如何使用fluid python API加载使用预测模型进行预测。 - -运行命令为: -``` -python ../infer.py \ - --model_path ${checkpoint_path}/${epoch_id}/eval_model/ \ - --model_name __model__.infer \ - --params_name __params__ \ - -c ../../configs/yolov3_mobilenet_v1_voc.yml \ - --infer_dir ../../demo -``` - -### PaddleLite - -该示例中产出的预测(inference)模型可以直接用PaddleLite进行加载使用。 -关于PaddleLite如何使用,请参考:[PaddleLite使用文档](https://github.com/PaddlePaddle/Paddle-Lite/wiki#%E4%BD%BF%E7%94%A8) - -## 示例结果 - -> 当前release的结果并非超参调优后的最好结果,仅做示例参考,后续我们会优化当前结果。 - -### MobileNetV1-YOLO-V3 - -| FLOPS |Box AP| model_size |Paddle Fluid inference time(ms)| Paddle Lite inference time(ms)| -|---|---|---|---|---| -|baseline|76.2 |93M |- |-| -|-50%|69.48 |51M |- |-| - -## FAQ diff --git a/PaddleCV/PaddleDetection/slim/prune/compress.py b/PaddleCV/PaddleDetection/slim/prune/compress.py deleted file mode 100644 index 66f9a0cb50514bfbdf2964024064caf6702cfbc4..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/slim/prune/compress.py +++ /dev/null @@ -1,255 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -import multiprocessing -import numpy as np -import sys -sys.path.append("../../") -from paddle.fluid.contrib.slim import Compressor - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be set before -# `import paddle`. Otherwise, it would not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -from paddle import fluid -from ppdet.core.workspace import load_config, merge_config, create -from ppdet.data.data_feed import create_reader -from ppdet.utils.eval_utils import parse_fetches, eval_results -from ppdet.utils.cli import ArgsParser -from ppdet.utils.check import check_gpu -import ppdet.utils.checkpoint as checkpoint -from ppdet.modeling.model_input import create_feed - -import logging -FORMAT = '%(asctime)s-%(levelname)s: %(message)s' -logging.basicConfig(level=logging.INFO, format=FORMAT) -logger = logging.getLogger(__name__) - - -def eval_run(exe, compile_program, reader, keys, values, cls, test_feed): - """ - Run evaluation program, return program outputs. - """ - iter_id = 0 - results = [] - if len(cls) != 0: - values = [] - for i in range(len(cls)): - _, accum_map = cls[i].get_map_var() - cls[i].reset(exe) - values.append(accum_map) - - images_num = 0 - start_time = time.time() - has_bbox = 'bbox' in keys - for data in reader(): - data = test_feed.feed(data) - feed_data = {'image': data['image'], 'im_size': data['im_size']} - outs = exe.run(compile_program, - feed=feed_data, - fetch_list=[values[0]], - return_numpy=False) - outs.append(data['gt_box']) - outs.append(data['gt_label']) - outs.append(data['is_difficult']) - res = { - k: (np.array(v), v.recursive_sequence_lengths()) - for k, v in zip(keys, outs) - } - results.append(res) - if iter_id % 100 == 0: - logger.info('Test iter {}'.format(iter_id)) - iter_id += 1 - images_num += len(res['bbox'][1][0]) if has_bbox else 1 - logger.info('Test finish iter {}'.format(iter_id)) - - end_time = time.time() - fps = images_num / (end_time - start_time) - if has_bbox: - logger.info('Total number of images: {}, inference time: {} fps.'. - format(images_num, fps)) - else: - logger.info('Total iteration: {}, inference time: {} batch/s.'.format( - images_num, fps)) - - return results - - -def main(): - cfg = load_config(FLAGS.config) - if 'architecture' in cfg: - main_arch = cfg.architecture - else: - raise ValueError("'architecture' not specified in config file.") - - merge_config(FLAGS.opt) - if 'log_iter' not in cfg: - cfg.log_iter = 20 - - # check if set use_gpu=True in paddlepaddle cpu version - check_gpu(cfg.use_gpu) - - if cfg.use_gpu: - devices_num = fluid.core.get_cuda_device_count() - else: - devices_num = int( - os.environ.get('CPU_NUM', multiprocessing.cpu_count())) - - if 'train_feed' not in cfg: - train_feed = create(main_arch + 'TrainFeed') - else: - train_feed = create(cfg.train_feed) - - if 'eval_feed' not in cfg: - eval_feed = create(main_arch + 'EvalFeed') - else: - eval_feed = create(cfg.eval_feed) - - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - lr_builder = create('LearningRate') - optim_builder = create('OptimizerBuilder') - - # build program - startup_prog = fluid.Program() - train_prog = fluid.Program() - with fluid.program_guard(train_prog, startup_prog): - with fluid.unique_name.guard(): - model = create(main_arch) - _, feed_vars = create_feed(train_feed, False) - train_fetches = model.train(feed_vars) - loss = train_fetches['loss'] - lr = lr_builder() - optimizer = optim_builder(lr) - optimizer.minimize(loss) - - train_reader = create_reader(train_feed, cfg.max_iters, FLAGS.dataset_dir) - - # parse train fetches - train_keys, train_values, _ = parse_fetches(train_fetches) - train_keys.append("lr") - train_values.append(lr.name) - - train_fetch_list = [] - for k, v in zip(train_keys, train_values): - train_fetch_list.append((k, v)) - - eval_prog = fluid.Program() - with fluid.program_guard(eval_prog, startup_prog): - with fluid.unique_name.guard(): - model = create(main_arch) - _, test_feed_vars = create_feed(eval_feed, False) - fetches = model.eval(test_feed_vars) - - eval_prog = eval_prog.clone(True) - - eval_reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir) - test_data_feed = fluid.DataFeeder(test_feed_vars.values(), place) - - # parse eval fetches - extra_keys = [] - if cfg.metric == 'COCO': - extra_keys = ['im_info', 'im_id', 'im_shape'] - if cfg.metric == 'VOC': - extra_keys = ['gt_box', 'gt_label', 'is_difficult'] - eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog, - extra_keys) - eval_fetch_list = [] - for k, v in zip(eval_keys, eval_values): - eval_fetch_list.append((k, v)) - - exe.run(startup_prog) - checkpoint.load_params(exe, train_prog, cfg.pretrain_weights) - - best_box_ap_list = [] - - def eval_func(program, scope): - - #place = fluid.CPUPlace() - #exe = fluid.Executor(place) - results = eval_run(exe, program, eval_reader, eval_keys, eval_values, - eval_cls, test_data_feed) - - resolution = None - if 'mask' in results[0]: - resolution = model.mask_head.resolution - box_ap_stats = eval_results(results, eval_feed, cfg.metric, - cfg.num_classes, resolution, False, - FLAGS.output_eval) - if len(best_box_ap_list) == 0: - best_box_ap_list.append(box_ap_stats[0]) - elif box_ap_stats[0] > best_box_ap_list[0]: - best_box_ap_list[0] = box_ap_stats[0] - logger.info("Best test box ap: {}".format(best_box_ap_list[0])) - return best_box_ap_list[0] - - test_feed = [('image', test_feed_vars['image'].name), - ('im_size', test_feed_vars['im_size'].name)] - - com = Compressor( - place, - fluid.global_scope(), - train_prog, - train_reader=train_reader, - train_feed_list=[(key, value.name) for key, value in feed_vars.items()], - train_fetch_list=train_fetch_list, - eval_program=eval_prog, - eval_reader=eval_reader, - eval_feed_list=test_feed, - eval_func={'map': eval_func}, - eval_fetch_list=[eval_fetch_list[0]], - save_eval_model=True, - prune_infer_model=[["image", "im_size"], ["multiclass_nms_0.tmp_0"]], - train_optimizer=None) - com.config(FLAGS.slim_file) - com.run() - - -if __name__ == '__main__': - parser = ArgsParser() - parser.add_argument( - "-s", - "--slim_file", - default=None, - type=str, - help="Config file of PaddleSlim.") - parser.add_argument( - "--output_eval", - default=None, - type=str, - help="Evaluation directory, default is current directory.") - parser.add_argument( - "-d", - "--dataset_dir", - default=None, - type=str, - help="Dataset path, same as DataFeed.dataset.dataset_dir") - FLAGS = parser.parse_args() - main() diff --git a/PaddleCV/PaddleDetection/slim/prune/images/MobileNetV1-YoloV3.pdf b/PaddleCV/PaddleDetection/slim/prune/images/MobileNetV1-YoloV3.pdf deleted file mode 100644 index f5d3a22db5030ffc6beb1e7f8c92bddd61e366e6..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/slim/prune/images/MobileNetV1-YoloV3.pdf and /dev/null differ diff --git a/PaddleCV/PaddleDetection/slim/prune/yolov3_mobilenet_v1_slim.yaml b/PaddleCV/PaddleDetection/slim/prune/yolov3_mobilenet_v1_slim.yaml deleted file mode 100644 index ff9859eafe5872dee13f33a3703502e334d1c85e..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/slim/prune/yolov3_mobilenet_v1_slim.yaml +++ /dev/null @@ -1,23 +0,0 @@ -version: 1.0 -pruners: - pruner_1: - class: 'StructurePruner' - pruning_axis: - '*': 0 - criterions: - '*': 'l1_norm' -strategies: - uniform_pruning_strategy: - class: 'UniformPruneStrategy' - pruner: 'pruner_1' - start_epoch: 0 - target_ratio: 0.5 - pruned_params: '(conv2_1_sep_weights)|(conv2_2_sep_weights)|(conv3_1_sep_weights)|(conv4_1_sep_weights)|(conv5_1_sep_weights)|(conv5_2_sep_weights)|(conv5_3_sep_weights)|(conv5_4_sep_weights)|(conv5_5_sep_weights)|(conv5_6_sep_weights)|(yolo_block.0.0.0.conv.weights)|(yolo_block.0.0.1.conv.weights)|(yolo_block.0.1.0.conv.weights)|(yolo_block.0.1.1.conv.weights)|(yolo_block.1.0.0.conv.weights)|(yolo_block.1.0.1.conv.weights)|(yolo_block.1.1.0.conv.weights)|(yolo_block.1.1.1.conv.weights)|(yolo_block.1.2.conv.weights)|(yolo_block.2.0.0.conv.weights)|(yolo_block.2.0.1.conv.weights)|(yolo_block.2.1.1.conv.weights)|(yolo_block.2.2.conv.weights)|(yolo_block.2.tip.conv.weights)' - metric_name: 'acc_top1' -compressor: - epoch: 271 - eval_epoch: 10 - #init_model: './checkpoints/0' # Please enable this option for loading checkpoint. - checkpoint_path: './checkpoints/' - strategies: - - uniform_pruning_strategy diff --git a/PaddleCV/PaddleDetection/slim/quantization/README.md b/PaddleCV/PaddleDetection/slim/quantization/README.md deleted file mode 100644 index d451e959a8828c24fcafb9ac52b8c5a2a3ce8de5..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/slim/quantization/README.md +++ /dev/null @@ -1,245 +0,0 @@ ->运行该示例前请安装Paddle1.6或更高版本 - -# 检测模型量化压缩示例 - -## 概述 - -该示例使用PaddleSlim提供的[量化压缩策略](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/tutorial.md#1-quantization-aware-training%E9%87%8F%E5%8C%96%E4%BB%8B%E7%BB%8D)对检测模型进行压缩。 -在阅读该示例前,建议您先了解以下内容: - -- [检测模型的常规训练方法](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/PaddleDetection) -- [PaddleSlim使用文档](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md) - - -## 配置文件说明 - -关于配置文件如何编写您可以参考: - -- [PaddleSlim配置文件编写说明](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md#122-%E9%85%8D%E7%BD%AE%E6%96%87%E4%BB%B6%E7%9A%84%E4%BD%BF%E7%94%A8) -- [量化策略配置文件编写说明](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md#21-%E9%87%8F%E5%8C%96%E8%AE%AD%E7%BB%83) - -其中save_out_nodes需要得到检测结果的Variable的名称,下面介绍如何确定save_out_nodes的参数 -以MobileNet V1为例,可在compress.py中构建好网络之后,直接打印Variable得到Variable的名称信息。 -代码示例: -``` - eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog, - extra_keys) - # print(eval_values) -``` -根据运行结果可看到Variable的名字为:`multiclass_nms_0.tmp_0`。 -## 训练 - -根据 [PaddleCV/PaddleDetection/tools/train.py](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/PaddleDetection/tools/train.py) 编写压缩脚本compress.py。 -在该脚本中定义了Compressor对象,用于执行压缩任务。 - -通过`python compress.py --help`查看可配置参数,简述如下: - -- config: 检测库的配置,其中配置了训练超参数、数据集信息等。 -- slim_file: PaddleSlim的配置文件,参见[配置文件说明](#配置文件说明)。 - -您可以通过运行以下命令运行该示例。 - -step1: 设置gpu卡 -``` -export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -``` -step2: 开始训练 - -使用PaddleDetection提供的配置文件用8卡进行训练: - -``` -python compress.py \ - -s yolov3_mobilenet_v1_slim.yaml \ - -c ../../configs/yolov3_mobilenet_v1_voc.yml \ - -d "../../dataset/voc" \ - -o max_iters=258 \ - LearningRate.base_lr=0.0001 \ - LearningRate.schedulers="[!PiecewiseDecay {gamma: 0.1, milestones: [258, 516]}]" \ - pretrain_weights=https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_voc.tar \ - YoloTrainFeed.batch_size=64 -``` - ->通过命令行覆盖设置max_iters选项,因为PaddleDetection中训练是以`batch`为单位迭代的,并没有涉及`epoch`的概念,但是PaddleSlim需要知道当前训练进行到第几个`epoch`, 所以需要将`max_iters`设置为一个`epoch`内的`batch`的数量。 - -如果要调整训练卡数,需要调整配置文件`yolov3_mobilenet_v1_voc.yml`中的以下参数: - -- **max_iters:** 一个`epoch`中batch的数量,需要设置为`total_num / batch_size`, 其中`total_num`为训练样本总数量,`batch_size`为多卡上总的batch size. -- **YoloTrainFeed.batch_size:** 当使用DataLoader时,表示单张卡上的batch size; 当使用普通reader时,则表示多卡上的总的batch_size。batch_size受限于显存大小。 -- **LeaningRate.base_lr:** 根据多卡的总`batch_size`调整`base_lr`,两者大小正相关,可以简单的按比例进行调整。 -- **LearningRate.schedulers.PiecewiseDecay.milestones:** 请根据batch size的变化对其调整。 -- **LearningRate.schedulers.PiecewiseDecay.LinearWarmup.steps:** 请根据batch size的变化对其进行调整。 - - -以下为4卡训练示例,通过命令行覆盖`yolov3_mobilenet_v1_voc.yml`中的参数: - -``` -python compress.py \ - -s yolov3_mobilenet_v1_slim.yaml \ - -c ../../configs/yolov3_mobilenet_v1_voc.yml \ - -d "../../dataset/voc" \ - -o max_iters=258 \ - LearningRate.base_lr=0.0001 \ - LearningRate.schedulers="[!PiecewiseDecay {gamma: 0.1, milestones: [258, 516]}]" \ - pretrain_weights=https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_voc.tar \ - YoloTrainFeed.batch_size=64 - -``` - -以下为2卡训练示例,受显存所制,单卡`batch_size`不变, 总`batch_size`减小,`base_lr`减小,一个epoch内batch数量增加,同时需要调整学习率相关参数,如下: - -``` -python compress.py \ - -s yolov3_mobilenet_v1_slim.yaml \ - -c ../../configs/yolov3_mobilenet_v1_voc.yml \ - -d "../../dataset/voc" \ - -o max_iters=516 \ - LearningRate.base_lr=0.00005 \ - LearningRate.schedulers="[!PiecewiseDecay {gamma: 0.1, milestones: [516, 1012]}]" \ - pretrain_weights=https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_voc.tar \ - YoloTrainFeed.batch_size=32 -``` - -通过`python compress.py --help`查看可配置参数。 -通过`python ../../tools/configure.py ${option_name} help`查看如何通过命令行覆盖配置文件`yolov3_mobilenet_v1_voc.yml`中的参数。 - - - -### 训练时的模型结构 -这部分介绍来源于[量化low-level API介绍](https://github.com/PaddlePaddle/models/tree/develop/PaddleSlim/quant_low_level_api#1-%E9%87%8F%E5%8C%96%E8%AE%AD%E7%BB%83low-level-apis%E4%BB%8B%E7%BB%8D)。 - -PaddlePaddle框架中和量化相关的IrPass, 分别有QuantizationTransformPass、QuantizationFreezePass、ConvertToInt8Pass。在训练时,对网络应用了QuantizationTransformPass,作用是在网络中的conv2d、depthwise_conv2d、mul等算子的各个输入前插入连续的量化op和反量化op,并改变相应反向算子的某些输入。示例图如下: - -

-
-图1:应用QuantizationTransformPass后的结果 -

- -### 保存断点(checkpoint) - -如果在配置文件中设置了`checkpoint_path`, 则在压缩任务执行过程中会自动保存断点,当任务异常中断时, -重启任务会自动从`checkpoint_path`路径下按数字顺序加载最新的checkpoint文件。如果不想让重启的任务从断点恢复, -需要修改配置文件中的`checkpoint_path`,或者将`checkpoint_path`路径下文件清空。 - ->注意:配置文件中的信息不会保存在断点中,重启前对配置文件的修改将会生效。 - - -### 保存评估和预测模型 - -如果在配置文件的量化策略中设置了`float_model_save_path`, `int8_model_save_path` 在训练结束后,会保存模型量化压缩之后用于预测的模型。接下来介绍这2种预测模型的区别。 - -#### FP32模型 -在介绍量化训练时的模型结构时介绍了PaddlePaddle框架中和量化相关的IrPass, 分别是QuantizationTransformPass、QuantizationFreezePass、ConvertToInt8Pass。FP32模型是在应用QuantizationFreezePass并删除eval_program中多余的operators之后,保存的模型。 - -QuantizationFreezePass主要用于改变IrGraph中量化op和反量化op的顺序,即将类似图1中的量化op和反量化op顺序改变为图2中的布局。除此之外,QuantizationFreezePass还会将`conv2d`、`depthwise_conv2d`、`mul`等算子的权重离线量化为int8_t范围内的值(但数据类型仍为float32),以减少预测过程中对权重的量化操作,示例如图2: - -

-
-图2:应用QuantizationFreezePass后的结果 -

- -#### 8-bit模型 -在对训练网络进行QuantizationFreezePass之后,执行ConvertToInt8Pass, -其主要目的是将执行完QuantizationFreezePass后输出的权重类型由`FP32`更改为`INT8`。换言之,用户可以选择将量化后的权重保存为float32类型(不执行ConvertToInt8Pass)或者int8_t类型(执行ConvertToInt8Pass),示例如图3: - -

-
-图3:应用ConvertToInt8Pass后的结果 -

- -> 综上,可得在量化过程中有以下几种模型结构: - -1. 原始模型 -2. 经QuantizationTransformPass之后得到的适用于训练的量化模型结构,在${checkpoint_path}下保存的`eval_model`是这种结构,在训练过程中每个epoch结束时也使用这个网络结构进行评估,虽然这个模型结构不是最终想要的模型结构,但是每个epoch的评估结果可用来挑选模型。 -3. 经QuantizationFreezePass之后得到的FP32模型结构,具体结构已在上面进行介绍。本文档中列出的数据集的评估结果是对FP32模型结构进行评估得到的结果。这种模型结构在训练过程中只会保存一次,也就是在量化配置文件中设置的`end_epoch`结束时进行保存,如果想将其他epoch的训练结果转化成FP32模型,可使用脚本 PaddleSlim/classification/quantization/freeze.py进行转化,具体使用方法在[评估](#评估)中介绍。 -4. 经ConvertToInt8Pass之后得到的8-bit模型结构,具体结构已在上面进行介绍。这种模型结构在训练过程中只会保存一次,也就是在量化配置文件中设置的`end_epoch`结束时进行保存,如果想将其他epoch的训练结果转化成8-bit模型,可使用脚本 slim/quantization/freeze.py进行转化,具体使用方法在[评估](#评估)中介绍。 - - -## 评估 - -### 每个epoch保存的评估模型 -因为量化的最终模型只有在end_epoch时保存一次,不能保证保存的模型是最好的,因此 -如果在配置文件中设置了`checkpoint_path`,则每个epoch会保存一个量化后的用于评估的模型, -该模型会保存在`${checkpoint_path}/${epoch_id}/eval_model/`路径下,包含`__model__`和`__params__`两个文件。 -其中,`__model__`用于保存模型结构信息,`__params__`用于保存参数(parameters)信息。模型结构和训练时一样。 - -如果不需要保存评估模型,可以在定义Compressor对象时,将`save_eval_model`选项设置为False(默认为True)。 - -脚本slim/eval.py中为使用该模型在评估数据集上做评估的示例。 -运行命令为: -``` -python ../eval.py \ - --model_path ${checkpoint_path}/${epoch_id}/eval_model/ \ - --model_name __model__ \ - --params_name __params__ \ - -c ../../configs/yolov3_mobilenet_v1_voc.yml \ - -d "../../dataset/voc" -``` - -在评估之后,选取效果最好的epoch的模型,可使用脚本 slim/quantization/freeze.py将该模型转化为以上介绍的2种模型:FP32模型,int8模型,需要配置的参数为: - -- model_path, 加载的模型路径,`为${checkpoint_path}/${epoch_id}/eval_model/` -- weight_quant_type 模型参数的量化方式,和配置文件中的类型保持一致 -- save_path `FP32`, `8-bit` 模型的保存路径,分别为 `${save_path}/float/`, `${save_path}/int8/` - -运行命令示例: -``` -python freeze.py \ - --model_path ${checkpoint_path}/${epoch_id}/eval_model/ \ - --weight_quant_type ${weight_quant_type} \ - --save_path ${any path you want} \ - -c ../../configs/yolov3_mobilenet_v1_voc.yml \ - -d "../../dataset/voc" -``` - -### 最终评估模型 -最终使用的评估模型是FP32模型,使用脚本slim/eval.py中为使用该模型在评估数据集上做评估的示例。 -运行命令为: -``` -python ../eval.py \ - --model_path ${float_model_path} - --model_name model \ - --params_name weights \ - -c ../../configs/yolov3_mobilenet_v1_voc.yml \ - -d "../../dataset/voc" -``` - -## 预测 - -### python预测 -FP32模型可直接使用原生PaddlePaddle Fluid预测方法进行预测。 - -在脚本slim/infer.py中展示了如何使用fluid python API加载使用预测模型进行预测。 - -运行命令示例: -``` -python ../infer.py \ - --model_path ${save_path}/float \ - --model_name model \ - --params_name weights \ - -c ../../configs/yolov3_mobilenet_v1_voc.yml \ - --infer_dir ../../demo -``` - - -### PaddleLite预测 -FP32模型可使用PaddleLite进行加载预测,可参见教程[Paddle-Lite如何加载运行量化模型](https://github.com/PaddlePaddle/Paddle-Lite/wiki/model_quantization) - - -## 示例结果 - ->当前release的结果并非超参调优后的最好结果,仅做示例参考,后续我们会优化当前结果。 - -### MobileNetV1-YOLO-V3 - -| weight量化方式 | activation量化方式| Box ap |Paddle Fluid inference time(ms)| Paddle Lite inference time(ms)| -|---|---|---|---|---| -|baseline|- |76.2%|- |-| -|abs_max|abs_max|- |- |-| -|abs_max|moving_average_abs_max|74.48%|10.99|3348.68| -|channel_wise_abs_max|abs_max|- |- |-| - -> 注: lite端运行手机信息:Android手机, -型号:BKL-AL20,运行内存RAM:4GB 6GB,CPU核心数:八核 4*A73 2.36GHz+4*A53 1.8GHz,操作系统:EMUI 8.0,CPU品牌:麒麟970 - - -## FAQ diff --git a/PaddleCV/PaddleDetection/slim/quantization/compress.py b/PaddleCV/PaddleDetection/slim/quantization/compress.py deleted file mode 100644 index 0e145abcf70c54a3b7960243e20c0cb8cb6d39d9..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/slim/quantization/compress.py +++ /dev/null @@ -1,266 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -import multiprocessing -import numpy as np -import datetime -from collections import deque -import sys -sys.path.append("../../") -from paddle.fluid.contrib.slim import Compressor -from paddle.fluid.framework import IrGraph -from paddle.fluid import core - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be set before -# `import paddle`. Otherwise, it would not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -from paddle import fluid - -from ppdet.core.workspace import load_config, merge_config, create -from ppdet.data.data_feed import create_reader - -from ppdet.utils.eval_utils import parse_fetches, eval_results -from ppdet.utils.stats import TrainingStats -from ppdet.utils.cli import ArgsParser, print_total_cfg -from ppdet.utils.check import check_gpu -import ppdet.utils.checkpoint as checkpoint -from ppdet.modeling.model_input import create_feed - -import logging -FORMAT = '%(asctime)s-%(levelname)s: %(message)s' -logging.basicConfig(level=logging.INFO, format=FORMAT) -logger = logging.getLogger(__name__) - - -def eval_run(exe, compile_program, reader, keys, values, cls, test_feed): - """ - Run evaluation program, return program outputs. - """ - iter_id = 0 - results = [] - if len(cls) != 0: - values = [] - for i in range(len(cls)): - _, accum_map = cls[i].get_map_var() - cls[i].reset(exe) - values.append(accum_map) - - images_num = 0 - start_time = time.time() - has_bbox = 'bbox' in keys - for data in reader(): - data = test_feed.feed(data) - feed_data = {'image': data['image'], 'im_size': data['im_size']} - outs = exe.run(compile_program, - feed=feed_data, - fetch_list=[values[0]], - return_numpy=False) - outs.append(data['gt_box']) - outs.append(data['gt_label']) - outs.append(data['is_difficult']) - res = { - k: (np.array(v), v.recursive_sequence_lengths()) - for k, v in zip(keys, outs) - } - results.append(res) - if iter_id % 100 == 0: - logger.info('Test iter {}'.format(iter_id)) - iter_id += 1 - images_num += len(res['bbox'][1][0]) if has_bbox else 1 - logger.info('Test finish iter {}'.format(iter_id)) - - end_time = time.time() - fps = images_num / (end_time - start_time) - if has_bbox: - logger.info('Total number of images: {}, inference time: {} fps.'. - format(images_num, fps)) - else: - logger.info('Total iteration: {}, inference time: {} batch/s.'.format( - images_num, fps)) - - return results - - -def main(): - cfg = load_config(FLAGS.config) - if 'architecture' in cfg: - main_arch = cfg.architecture - else: - raise ValueError("'architecture' not specified in config file.") - - merge_config(FLAGS.opt) - if 'log_iter' not in cfg: - cfg.log_iter = 20 - - # check if set use_gpu=True in paddlepaddle cpu version - check_gpu(cfg.use_gpu) - - if cfg.use_gpu: - devices_num = fluid.core.get_cuda_device_count() - else: - devices_num = int( - os.environ.get('CPU_NUM', multiprocessing.cpu_count())) - - if 'train_feed' not in cfg: - train_feed = create(main_arch + 'TrainFeed') - else: - train_feed = create(cfg.train_feed) - - if 'eval_feed' not in cfg: - eval_feed = create(main_arch + 'EvalFeed') - else: - eval_feed = create(cfg.eval_feed) - - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - lr_builder = create('LearningRate') - optim_builder = create('OptimizerBuilder') - - # build program - startup_prog = fluid.Program() - train_prog = fluid.Program() - with fluid.program_guard(train_prog, startup_prog): - with fluid.unique_name.guard(): - model = create(main_arch) - _, feed_vars = create_feed(train_feed, False) - train_fetches = model.train(feed_vars) - loss = train_fetches['loss'] - lr = lr_builder() - optimizer = optim_builder(lr) - optimizer.minimize(loss) - - train_reader = create_reader(train_feed, cfg.max_iters, FLAGS.dataset_dir) - - # parse train fetches - train_keys, train_values, _ = parse_fetches(train_fetches) - train_values.append(lr) - - train_fetch_list = [] - for k, v in zip(train_keys, train_values): - train_fetch_list.append((k, v)) - print("train_fetch_list: {}".format(train_fetch_list)) - - eval_prog = fluid.Program() - with fluid.program_guard(eval_prog, startup_prog): - with fluid.unique_name.guard(): - model = create(main_arch) - _, test_feed_vars = create_feed(eval_feed, False) - fetches = model.eval(test_feed_vars) - eval_prog = eval_prog.clone(True) - - eval_reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir) - #eval_pyreader.decorate_sample_list_generator(eval_reader, place) - test_data_feed = fluid.DataFeeder(test_feed_vars.values(), place) - - # parse eval fetches - extra_keys = [] - if cfg.metric == 'COCO': - extra_keys = ['im_info', 'im_id', 'im_shape'] - if cfg.metric == 'VOC': - extra_keys = ['gt_box', 'gt_label', 'is_difficult'] - eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog, - extra_keys) - # print(eval_values) - - eval_fetch_list = [] - for k, v in zip(eval_keys, eval_values): - eval_fetch_list.append((k, v)) - - exe.run(startup_prog) - - start_iter = 0 - - checkpoint.load_params(exe, train_prog, cfg.pretrain_weights) - - best_box_ap_list = [] - - def eval_func(program, scope): - - #place = fluid.CPUPlace() - #exe = fluid.Executor(place) - results = eval_run(exe, program, eval_reader, eval_keys, eval_values, - eval_cls, test_data_feed) - - resolution = None - if 'mask' in results[0]: - resolution = model.mask_head.resolution - box_ap_stats = eval_results(results, eval_feed, cfg.metric, - cfg.num_classes, resolution, False, - FLAGS.output_eval) - if len(best_box_ap_list) == 0: - best_box_ap_list.append(box_ap_stats[0]) - elif box_ap_stats[0] > best_box_ap_list[0]: - best_box_ap_list[0] = box_ap_stats[0] - logger.info("Best test box ap: {}".format(best_box_ap_list[0])) - return best_box_ap_list[0] - - test_feed = [('image', test_feed_vars['image'].name), - ('im_size', test_feed_vars['im_size'].name)] - - com = Compressor( - place, - fluid.global_scope(), - train_prog, - train_reader=train_reader, - train_feed_list=[(key, value.name) for key, value in feed_vars.items()], - train_fetch_list=train_fetch_list, - eval_program=eval_prog, - eval_reader=eval_reader, - eval_feed_list=test_feed, - eval_func={'map': eval_func}, - eval_fetch_list=[eval_fetch_list[0]], - prune_infer_model=[["image", "im_size"], ["multiclass_nms_0.tmp_0"]], - train_optimizer=None) - com.config(FLAGS.slim_file) - com.run() - - -if __name__ == '__main__': - parser = ArgsParser() - parser.add_argument( - "-s", - "--slim_file", - default=None, - type=str, - help="Config file of PaddleSlim.") - parser.add_argument( - "--output_eval", - default=None, - type=str, - help="Evaluation directory, default is current directory.") - parser.add_argument( - "-d", - "--dataset_dir", - default=None, - type=str, - help="Dataset path, same as DataFeed.dataset.dataset_dir") - FLAGS = parser.parse_args() - main() diff --git a/PaddleCV/PaddleDetection/slim/quantization/freeze.py b/PaddleCV/PaddleDetection/slim/quantization/freeze.py deleted file mode 100644 index 42c7bc62fd771366430f3658d9446a0f12fe2125..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/slim/quantization/freeze.py +++ /dev/null @@ -1,226 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -import multiprocessing -import numpy as np -import datetime -from collections import deque -import sys -sys.path.append("../../") -from paddle.fluid.contrib.slim import Compressor -from paddle.fluid.framework import IrGraph -from paddle.fluid import core -from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass -from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass -from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass -from paddle.fluid.contrib.slim.quantization import TransformForMobilePass - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be set before -# `import paddle`. Otherwise, it would not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -from paddle import fluid - -from ppdet.core.workspace import load_config, merge_config, create -from ppdet.data.data_feed import create_reader - -from ppdet.utils.eval_utils import parse_fetches, eval_results -from ppdet.utils.stats import TrainingStats -from ppdet.utils.cli import ArgsParser -from ppdet.utils.check import check_gpu -import ppdet.utils.checkpoint as checkpoint -from ppdet.modeling.model_input import create_feed - -import logging -FORMAT = '%(asctime)s-%(levelname)s: %(message)s' -logging.basicConfig(level=logging.INFO, format=FORMAT) -logger = logging.getLogger(__name__) - - -def eval_run(exe, compile_program, reader, keys, values, cls, test_feed): - """ - Run evaluation program, return program outputs. - """ - iter_id = 0 - results = [] - - images_num = 0 - start_time = time.time() - has_bbox = 'bbox' in keys - for data in reader(): - data = test_feed.feed(data) - feed_data = {'image': data['image'], 'im_size': data['im_size']} - outs = exe.run(compile_program, - feed=feed_data, - fetch_list=values[0], - return_numpy=False) - outs.append(data['gt_box']) - outs.append(data['gt_label']) - outs.append(data['is_difficult']) - res = { - k: (np.array(v), v.recursive_sequence_lengths()) - for k, v in zip(keys, outs) - } - results.append(res) - if iter_id % 100 == 0: - logger.info('Test iter {}'.format(iter_id)) - iter_id += 1 - images_num += len(res['bbox'][1][0]) if has_bbox else 1 - logger.info('Test finish iter {}'.format(iter_id)) - - end_time = time.time() - fps = images_num / (end_time - start_time) - if has_bbox: - logger.info('Total number of images: {}, inference time: {} fps.'. - format(images_num, fps)) - else: - logger.info('Total iteration: {}, inference time: {} batch/s.'.format( - images_num, fps)) - - return results - - -def main(): - cfg = load_config(FLAGS.config) - if 'architecture' in cfg: - main_arch = cfg.architecture - else: - raise ValueError("'architecture' not specified in config file.") - - merge_config(FLAGS.opt) - if 'log_iter' not in cfg: - cfg.log_iter = 20 - - # check if set use_gpu=True in paddlepaddle cpu version - check_gpu(cfg.use_gpu) - - if cfg.use_gpu: - devices_num = fluid.core.get_cuda_device_count() - else: - devices_num = int( - os.environ.get('CPU_NUM', multiprocessing.cpu_count())) - - if 'eval_feed' not in cfg: - eval_feed = create(main_arch + 'EvalFeed') - else: - eval_feed = create(cfg.eval_feed) - - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - _, test_feed_vars = create_feed(eval_feed, False) - - eval_reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir) - #eval_pyreader.decorate_sample_list_generator(eval_reader, place) - test_data_feed = fluid.DataFeeder(test_feed_vars.values(), place) - - assert os.path.exists(FLAGS.model_path) - infer_prog, feed_names, fetch_targets = fluid.io.load_inference_model( - dirname=FLAGS.model_path, - executor=exe, - model_filename='__model__.infer', - params_filename='__params__') - - eval_keys = ['bbox', 'gt_box', 'gt_label', 'is_difficult'] - eval_values = [ - 'multiclass_nms_0.tmp_0', 'gt_box', 'gt_label', 'is_difficult' - ] - eval_cls = [] - eval_values[0] = fetch_targets[0] - - results = eval_run(exe, infer_prog, eval_reader, eval_keys, eval_values, - eval_cls, test_data_feed) - - resolution = None - if 'mask' in results[0]: - resolution = model.mask_head.resolution - box_ap_stats = eval_results(results, eval_feed, cfg.metric, cfg.num_classes, - resolution, False, FLAGS.output_eval) - - logger.info("freeze the graph for inference") - test_graph = IrGraph(core.Graph(infer_prog.desc), for_test=True) - - freeze_pass = QuantizationFreezePass( - scope=fluid.global_scope(), - place=place, - weight_quantize_type=FLAGS.weight_quant_type) - freeze_pass.apply(test_graph) - server_program = test_graph.to_program() - fluid.io.save_inference_model( - dirname=os.path.join(FLAGS.save_path, 'float'), - feeded_var_names=feed_names, - target_vars=fetch_targets, - executor=exe, - main_program=server_program, - model_filename='model', - params_filename='weights') - - logger.info("convert the weights into int8 type") - convert_int8_pass = ConvertToInt8Pass( - scope=fluid.global_scope(), place=place) - convert_int8_pass.apply(test_graph) - server_int8_program = test_graph.to_program() - fluid.io.save_inference_model( - dirname=os.path.join(FLAGS.save_path, 'int8'), - feeded_var_names=feed_names, - target_vars=fetch_targets, - executor=exe, - main_program=server_int8_program, - model_filename='model', - params_filename='weights') - - -if __name__ == '__main__': - parser = ArgsParser() - parser.add_argument( - "-m", "--model_path", default=None, type=str, help="path of checkpoint") - parser.add_argument( - "--output_eval", - default=None, - type=str, - help="Evaluation directory, default is current directory.") - parser.add_argument( - "-d", - "--dataset_dir", - default=None, - type=str, - help="Dataset path, same as DataFeed.dataset.dataset_dir") - parser.add_argument( - "--weight_quant_type", - default='abs_max', - type=str, - help="quantization type for weight") - parser.add_argument( - "--save_path", - default='./output', - type=str, - help="path to save quantization inference model") - - FLAGS = parser.parse_args() - main() diff --git a/PaddleCV/PaddleDetection/slim/quantization/images/ConvertToInt8Pass.png b/PaddleCV/PaddleDetection/slim/quantization/images/ConvertToInt8Pass.png deleted file mode 100644 index 8b5849819c0bc8e592dc8f864d8945330df85ab1..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/slim/quantization/images/ConvertToInt8Pass.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/slim/quantization/images/FreezePass.png b/PaddleCV/PaddleDetection/slim/quantization/images/FreezePass.png deleted file mode 100644 index acd2b0a890a8af85bec6eecdb22e47ad386a178c..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/slim/quantization/images/FreezePass.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/slim/quantization/images/TransformForMobilePass.png b/PaddleCV/PaddleDetection/slim/quantization/images/TransformForMobilePass.png deleted file mode 100644 index 4104cacc67af0be1c7bc152696e2ae544127aace..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/slim/quantization/images/TransformForMobilePass.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/slim/quantization/images/TransformPass.png b/PaddleCV/PaddleDetection/slim/quantization/images/TransformPass.png deleted file mode 100644 index f29ab62753e0e6ddf28d0c1dda7139705fc24b18..0000000000000000000000000000000000000000 Binary files a/PaddleCV/PaddleDetection/slim/quantization/images/TransformPass.png and /dev/null differ diff --git a/PaddleCV/PaddleDetection/slim/quantization/yolov3_mobilenet_v1_slim.yaml b/PaddleCV/PaddleDetection/slim/quantization/yolov3_mobilenet_v1_slim.yaml deleted file mode 100644 index 9d453450d91edf4d10c6aa5fd9fd29f21953e5d3..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/slim/quantization/yolov3_mobilenet_v1_slim.yaml +++ /dev/null @@ -1,19 +0,0 @@ -version: 1.0 -strategies: - quantization_strategy: - class: 'QuantizationStrategy' - start_epoch: 0 - end_epoch: 4 - float_model_save_path: './output/yolov3/float' - int8_model_save_path: './output/yolov3/int8' - weight_bits: 8 - activation_bits: 8 - weight_quantize_type: 'abs_max' - activation_quantize_type: 'moving_average_abs_max' - save_in_nodes: ['image', 'im_size'] - save_out_nodes: ['multiclass_nms_0.tmp_0'] -compressor: - epoch: 5 - checkpoint_path: './checkpoints/yolov3/' - strategies: - - quantization_strategy diff --git a/PaddleCV/PaddleDetection/tools/__init__.py b/PaddleCV/PaddleDetection/tools/__init__.py deleted file mode 100644 index d0c32e26092f6ea25771279418582a24ea449ab2..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/tools/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/PaddleCV/PaddleDetection/tools/configure.py b/PaddleCV/PaddleDetection/tools/configure.py deleted file mode 100644 index 560d161513ae8f0115d8d3d5f97f6a0695642015..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/tools/configure.py +++ /dev/null @@ -1,202 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import sys -from argparse import ArgumentParser, RawDescriptionHelpFormatter - -import yaml - -from ppdet.core.workspace import get_registered_modules, load_config, dump_value -from ppdet.utils.cli import ColorTTY, print_total_cfg - -color_tty = ColorTTY() - -MISC_CONFIG = { - "architecture": "", - "max_iters": "", - "train_feed": "", - "eval_feed": "", - "test_feed": "", - "pretrain_weights": "", - "save_dir": "", - "weights": "", - "metric": "", - "map_type": "11point", - "log_smooth_window": 20, - "snapshot_iter": 10000, - "log_iter": 20, - "use_gpu": True, - "finetune_exclude_pretrained_params": "", -} - - -def dump_config(module, minimal=False): - args = module.schema.values() - if minimal: - args = [arg for arg in args if not arg.has_default()] - return yaml.dump( - { - module.name: { - arg.name: arg.default if arg.has_default() else "" - for arg in args - } - }, - default_flow_style=False, - default_style='') - - -def list_modules(**kwargs): - target_category = kwargs['category'] - module_schema = get_registered_modules() - module_by_category = {} - - for schema in module_schema.values(): - category = schema.category - if target_category is not None and schema.category != target_category: - continue - if category not in module_by_category: - module_by_category[category] = [schema] - else: - module_by_category[category].append(schema) - - for cat, modules in module_by_category.items(): - print("Available modules in the category '{}':".format(cat)) - print("") - max_len = max([len(mod.name) for mod in modules]) - for mod in modules: - print(color_tty.green(mod.name.ljust(max_len)), - mod.doc.split('\n')[0]) - print("") - - -def help_module(**kwargs): - schema = get_registered_modules()[kwargs['module']] - - doc = schema.doc is None and "Not documented" or "{}".format(schema.doc) - func_args = {arg.name: arg.doc for arg in schema.schema.values()} - max_len = max([len(k) for k in func_args.keys()]) - opts = "\n".join([ - "{} {}".format(color_tty.green(k.ljust(max_len)), v) - for k, v in func_args.items() - ]) - template = dump_config(schema) - print("{}\n\n{}\n\n{}\n\n{}\n\n{}\n\n{}\n{}\n".format( - color_tty.bold(color_tty.blue("MODULE DESCRIPTION:")), - doc, - color_tty.bold(color_tty.blue("MODULE OPTIONS:")), - opts, - color_tty.bold(color_tty.blue("CONFIGURATION TEMPLATE:")), - template, - color_tty.bold(color_tty.blue("COMMAND LINE OPTIONS:")), )) - for arg in schema.schema.values(): - print("--opt {}.{}={}".format(schema.name, arg.name, - dump_value(arg.default) - if arg.has_default() else "")) - - -def generate_config(**kwargs): - minimal = kwargs['minimal'] - modules = kwargs['modules'] - module_schema = get_registered_modules() - visited = [] - schema = [] - - def walk(m): - if m in visited: - return - s = module_schema[m] - schema.append(s) - visited.append(m) - - for mod in modules: - walk(mod) - - # XXX try to be smart about when to add header, - # if any "architecture" module, is included, head will be added as well - if any([getattr(m, 'category', None) == 'architecture' for m in schema]): - # XXX for ordered printing - header = "" - for k, v in MISC_CONFIG.items(): - header += yaml.dump( - { - k: v - }, default_flow_style=False, default_style='') - print(header) - - for s in schema: - print(dump_config(s, minimal)) - - -# FIXME this is pretty hackish, maybe implement a custom YAML printer? -def analyze_config(**kwargs): - config = load_config(kwargs['file']) - print_total_cfg(config) - - -if __name__ == '__main__': - argv = sys.argv[1:] - - parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter) - subparsers = parser.add_subparsers(help='Supported Commands') - list_parser = subparsers.add_parser("list", help="list available modules") - help_parser = subparsers.add_parser( - "help", help="show detail options for module") - generate_parser = subparsers.add_parser( - "generate", help="generate configuration template") - analyze_parser = subparsers.add_parser( - "analyze", help="analyze configuration file") - - list_parser.set_defaults(func=list_modules) - help_parser.set_defaults(func=help_module) - generate_parser.set_defaults(func=generate_config) - analyze_parser.set_defaults(func=analyze_config) - - list_group = list_parser.add_mutually_exclusive_group() - list_group.add_argument( - "-c", - "--category", - type=str, - default=None, - help="list modules for ") - - help_parser.add_argument( - "module", - help="module to show info for", - choices=list(get_registered_modules().keys())) - - generate_parser.add_argument( - "modules", - nargs='+', - help="include these module in generated configuration template", - choices=list(get_registered_modules().keys())) - generate_group = generate_parser.add_mutually_exclusive_group() - generate_group.add_argument( - "--minimal", action='store_true', help="only include required options") - generate_group.add_argument( - "--full", - action='store_false', - dest='minimal', - help="include all options") - - analyze_parser.add_argument("file", help="configuration file to analyze") - - if len(sys.argv) < 2: - parser.print_help() - sys.exit(1) - - args = parser.parse_args(argv) - if hasattr(args, 'func'): - args.func(**vars(args)) diff --git a/PaddleCV/PaddleDetection/tools/eval.py b/PaddleCV/PaddleDetection/tools/eval.py deleted file mode 100644 index 15692d3dd8b13789faaaa6a9a0bb3ed9385f37a6..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/tools/eval.py +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be set before -# `import paddle`. Otherwise, it would not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -import paddle.fluid as fluid - -from ppdet.utils.eval_utils import parse_fetches, eval_run, eval_results, json_eval_results -import ppdet.utils.checkpoint as checkpoint -from ppdet.utils.check import check_gpu -from ppdet.modeling.model_input import create_feed -from ppdet.data.data_feed import create_reader -from ppdet.core.workspace import load_config, merge_config, create -from ppdet.utils.cli import print_total_cfg -from ppdet.utils.cli import ArgsParser - -import logging -FORMAT = '%(asctime)s-%(levelname)s: %(message)s' -logging.basicConfig(level=logging.INFO, format=FORMAT) -logger = logging.getLogger(__name__) - - -def main(): - """ - Main evaluate function - """ - cfg = load_config(FLAGS.config) - if 'architecture' in cfg: - main_arch = cfg.architecture - else: - raise ValueError("'architecture' not specified in config file.") - - merge_config(FLAGS.opt) - # check if set use_gpu=True in paddlepaddle cpu version - check_gpu(cfg.use_gpu) - print_total_cfg(cfg) - - if 'eval_feed' not in cfg: - eval_feed = create(main_arch + 'EvalFeed') - else: - eval_feed = create(cfg.eval_feed) - - multi_scale_test = getattr(cfg, 'MultiScaleTEST', None) - - # define executor - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - # build program - model = create(main_arch) - startup_prog = fluid.Program() - eval_prog = fluid.Program() - with fluid.program_guard(eval_prog, startup_prog): - with fluid.unique_name.guard(): - pyreader, feed_vars = create_feed(eval_feed) - if multi_scale_test is None: - fetches = model.eval(feed_vars) - else: - fetches = model.eval(feed_vars, multi_scale_test) - eval_prog = eval_prog.clone(True) - reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir) - pyreader.decorate_sample_list_generator(reader, place) - - # eval already exists json file - if FLAGS.json_eval: - logger.info( - "In json_eval mode, PaddleDetection will evaluate json files in " - "output_eval directly. And proposal.json, bbox.json and mask.json " - "will be detected by default.") - json_eval_results( - eval_feed, cfg.metric, json_directory=FLAGS.output_eval) - return - - compile_program = fluid.compiler.CompiledProgram( - eval_prog).with_data_parallel() - - # load model - exe.run(startup_prog) - if 'weights' in cfg: - checkpoint.load_params(exe, eval_prog, cfg.weights) - - assert cfg.metric in ['COCO', 'VOC'], \ - "unknown metric type {}".format(cfg.metric) - extra_keys = [] - if cfg.metric == 'COCO': - extra_keys = ['im_info', 'im_id', 'im_shape'] - if cfg.metric == 'VOC': - extra_keys = ['gt_box', 'gt_label', 'is_difficult'] - - keys, values, cls = parse_fetches(fetches, eval_prog, extra_keys) - - # whether output bbox is normalized in model output layer - is_bbox_normalized = False - if hasattr(model, 'is_bbox_normalized') and \ - callable(model.is_bbox_normalized): - is_bbox_normalized = model.is_bbox_normalized() - - sub_eval_prog = None - sub_keys = None - sub_values = None - # build sub-program - if 'Mask' in main_arch and multi_scale_test: - sub_eval_prog = fluid.Program() - with fluid.program_guard(sub_eval_prog, startup_prog): - with fluid.unique_name.guard(): - _, feed_vars = create_feed( - eval_feed, use_pyreader=False, sub_prog_feed=True) - sub_fetches = model.eval( - feed_vars, multi_scale_test, mask_branch=True) - extra_keys = [] - if cfg.metric == 'COCO': - extra_keys = ['im_id', 'im_shape'] - if cfg.metric == 'VOC': - extra_keys = ['gt_box', 'gt_label', 'is_difficult'] - sub_keys, sub_values, _ = parse_fetches(sub_fetches, sub_eval_prog, - extra_keys) - sub_eval_prog = sub_eval_prog.clone(True) - - if 'weights' in cfg: - checkpoint.load_params(exe, sub_eval_prog, cfg.weights) - - results = eval_run(exe, compile_program, pyreader, keys, values, cls, cfg, - sub_eval_prog, sub_keys, sub_values) - - # evaluation - resolution = None - if 'mask' in results[0]: - resolution = model.mask_head.resolution - # if map_type not set, use default 11point, only use in VOC eval - map_type = cfg.map_type if 'map_type' in cfg else '11point' - eval_results(results, eval_feed, cfg.metric, cfg.num_classes, resolution, - is_bbox_normalized, FLAGS.output_eval, map_type) - - -if __name__ == '__main__': - parser = ArgsParser() - parser.add_argument( - "--json_eval", - action='store_true', - default=False, - help="Whether to re eval with already exists bbox.json or mask.json") - parser.add_argument( - "-d", - "--dataset_dir", - default=None, - type=str, - help="Dataset path, same as DataFeed.dataset.dataset_dir") - parser.add_argument( - "-f", - "--output_eval", - default=None, - type=str, - help="Evaluation file directory, default is current directory.") - FLAGS = parser.parse_args() - main() diff --git a/PaddleCV/PaddleDetection/tools/export_model.py b/PaddleCV/PaddleDetection/tools/export_model.py deleted file mode 100644 index b0c9edac316220ca2b752fad05eec5437f698de8..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/tools/export_model.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from paddle import fluid - -from ppdet.core.workspace import load_config, merge_config, create -from ppdet.modeling.model_input import create_feed -from ppdet.utils.cli import ArgsParser -import ppdet.utils.checkpoint as checkpoint - -import logging -FORMAT = '%(asctime)s-%(levelname)s: %(message)s' -logging.basicConfig(level=logging.INFO, format=FORMAT) -logger = logging.getLogger(__name__) - - -def prune_feed_vars(feeded_var_names, target_vars, prog): - """ - Filter out feed variables which are not in program, - pruned feed variables are only used in post processing - on model output, which are not used in program, such - as im_id to identify image order, im_shape to clip bbox - in image. - """ - exist_var_names = [] - prog = prog.clone() - prog = prog._prune(targets=target_vars) - global_block = prog.global_block() - for name in feeded_var_names: - try: - v = global_block.var(name) - exist_var_names.append(str(v.name)) - except Exception: - logger.info('save_inference_model pruned unused feed ' - 'variables {}'.format(name)) - pass - return exist_var_names - - -def save_infer_model(FLAGS, exe, feed_vars, test_fetches, infer_prog): - cfg_name = os.path.basename(FLAGS.config).split('.')[0] - save_dir = os.path.join(FLAGS.output_dir, cfg_name) - feed_var_names = [var.name for var in feed_vars.values()] - target_vars = list(test_fetches.values()) - feed_var_names = prune_feed_vars(feed_var_names, target_vars, infer_prog) - logger.info("Export inference model to {}, input: {}, output: " - "{}...".format(save_dir, feed_var_names, - [str(var.name) for var in target_vars])) - fluid.io.save_inference_model( - save_dir, - feeded_var_names=feed_var_names, - target_vars=target_vars, - executor=exe, - main_program=infer_prog, - params_filename="__params__") - - -def main(): - cfg = load_config(FLAGS.config) - - if 'architecture' in cfg: - main_arch = cfg.architecture - else: - raise ValueError("'architecture' not specified in config file.") - - merge_config(FLAGS.opt) - - if 'test_feed' not in cfg: - test_feed = create(main_arch + 'TestFeed') - else: - test_feed = create(cfg.test_feed) - - # Use CPU for exporting inference model instead of GPU - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - model = create(main_arch) - - startup_prog = fluid.Program() - infer_prog = fluid.Program() - with fluid.program_guard(infer_prog, startup_prog): - with fluid.unique_name.guard(): - _, feed_vars = create_feed(test_feed, use_pyreader=False) - test_fetches = model.test(feed_vars) - infer_prog = infer_prog.clone(True) - - exe.run(startup_prog) - checkpoint.load_params(exe, infer_prog, cfg.weights) - - save_infer_model(FLAGS, exe, feed_vars, test_fetches, infer_prog) - - -if __name__ == '__main__': - parser = ArgsParser() - parser.add_argument( - "--output_dir", - type=str, - default="output", - help="Directory for storing the output model files.") - FLAGS = parser.parse_args() - main() diff --git a/PaddleCV/PaddleDetection/tools/face_eval.py b/PaddleCV/PaddleDetection/tools/face_eval.py deleted file mode 100644 index f74d5ba431e2933e346d8430e9ba9b2ad20af170..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/tools/face_eval.py +++ /dev/null @@ -1,298 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import paddle.fluid as fluid -import numpy as np -from PIL import Image -from collections import OrderedDict - -import ppdet.utils.checkpoint as checkpoint -from ppdet.utils.cli import ArgsParser -from ppdet.utils.check import check_gpu -from ppdet.utils.widerface_eval_utils import get_shrink, bbox_vote, \ - save_widerface_bboxes, save_fddb_bboxes, to_chw_bgr -from ppdet.core.workspace import load_config, merge_config, create -from ppdet.modeling.model_input import create_feed - -import logging -FORMAT = '%(asctime)s-%(levelname)s: %(message)s' -logging.basicConfig(level=logging.INFO, format=FORMAT) -logger = logging.getLogger(__name__) - - -def face_img_process(image, - mean=[104., 117., 123.], - std=[127.502231, 127.502231, 127.502231]): - img = np.array(image) - img = to_chw_bgr(img) - img = img.astype('float32') - img -= np.array(mean)[:, np.newaxis, np.newaxis].astype('float32') - img /= np.array(std)[:, np.newaxis, np.newaxis].astype('float32') - img = [img] - img = np.array(img) - return img - - -def face_eval_run(exe, - compile_program, - fetches, - img_root_dir, - gt_file, - pred_dir='output/pred', - eval_mode='widerface', - multi_scale=False): - # load ground truth files - with open(gt_file, 'r') as f: - gt_lines = f.readlines() - imid2path = [] - pos_gt = 0 - while pos_gt < len(gt_lines): - name_gt = gt_lines[pos_gt].strip('\n\t').split()[0] - imid2path.append(name_gt) - pos_gt += 1 - n_gt = int(gt_lines[pos_gt].strip('\n\t').split()[0]) - pos_gt += 1 + n_gt - logger.info('The ground truth file load {} images'.format(len(imid2path))) - - dets_dist = OrderedDict() - for iter_id, im_path in enumerate(imid2path): - image_path = os.path.join(img_root_dir, im_path) - if eval_mode == 'fddb': - image_path += '.jpg' - image = Image.open(image_path).convert('RGB') - if multi_scale: - shrink, max_shrink = get_shrink(image.size[1], image.size[0]) - det0 = detect_face(exe, compile_program, fetches, image, shrink) - det1 = flip_test(exe, compile_program, fetches, image, shrink) - [det2, det3] = multi_scale_test(exe, compile_program, fetches, image, - max_shrink) - det4 = multi_scale_test_pyramid(exe, compile_program, fetches, image, - max_shrink) - det = np.row_stack((det0, det1, det2, det3, det4)) - dets = bbox_vote(det) - else: - dets = detect_face(exe, compile_program, fetches, image, 1) - if eval_mode == 'widerface': - save_widerface_bboxes(image_path, dets, pred_dir) - else: - dets_dist[im_path] = dets - if iter_id % 100 == 0: - logger.info('Test iter {}'.format(iter_id)) - if eval_mode == 'fddb': - save_fddb_bboxes(dets_dist, pred_dir) - logger.info("Finish evaluation.") - - -def detect_face(exe, compile_program, fetches, image, shrink): - image_shape = [3, image.size[1], image.size[0]] - if shrink != 1: - h, w = int(image_shape[1] * shrink), int(image_shape[2] * shrink) - image = image.resize((w, h), Image.ANTIALIAS) - image_shape = [3, h, w] - - img = face_img_process(image) - detection, = exe.run(compile_program, - feed={'image': img}, - fetch_list=[fetches['bbox']], - return_numpy=False) - detection = np.array(detection) - # layout: xmin, ymin, xmax. ymax, score - if np.prod(detection.shape) == 1: - logger.info("No face detected") - return np.array([[0, 0, 0, 0, 0]]) - det_conf = detection[:, 1] - det_xmin = image_shape[2] * detection[:, 2] / shrink - det_ymin = image_shape[1] * detection[:, 3] / shrink - det_xmax = image_shape[2] * detection[:, 4] / shrink - det_ymax = image_shape[1] * detection[:, 5] / shrink - - det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf)) - return det - - -def flip_test(exe, compile_program, fetches, image, shrink): - img = image.transpose(Image.FLIP_LEFT_RIGHT) - det_f = detect_face(exe, compile_program, fetches, img, shrink) - det_t = np.zeros(det_f.shape) - # image.size: [width, height] - det_t[:, 0] = image.size[0] - det_f[:, 2] - det_t[:, 1] = det_f[:, 1] - det_t[:, 2] = image.size[0] - det_f[:, 0] - det_t[:, 3] = det_f[:, 3] - det_t[:, 4] = det_f[:, 4] - return det_t - - -def multi_scale_test(exe, compile_program, fetches, image, max_shrink): - # Shrink detecting is only used to detect big faces - st = 0.5 if max_shrink >= 0.75 else 0.5 * max_shrink - det_s = detect_face(exe, compile_program, fetches, image, st) - index = np.where( - np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) - > 30)[0] - det_s = det_s[index, :] - # Enlarge one times - bt = min(2, max_shrink) if max_shrink > 1 else (st + max_shrink) / 2 - det_b = detect_face(exe, compile_program, fetches, image, bt) - - # Enlarge small image x times for small faces - if max_shrink > 2: - bt *= 2 - while bt < max_shrink: - det_b = np.row_stack((det_b, detect_face(exe, compile_program, - fetches, image, bt))) - bt *= 2 - det_b = np.row_stack((det_b, detect_face(exe, compile_program, fetches, - image, max_shrink))) - - # Enlarged images are only used to detect small faces. - if bt > 1: - index = np.where( - np.minimum(det_b[:, 2] - det_b[:, 0] + 1, - det_b[:, 3] - det_b[:, 1] + 1) < 100)[0] - det_b = det_b[index, :] - # Shrinked images are only used to detect big faces. - else: - index = np.where( - np.maximum(det_b[:, 2] - det_b[:, 0] + 1, - det_b[:, 3] - det_b[:, 1] + 1) > 30)[0] - det_b = det_b[index, :] - return det_s, det_b - - -def multi_scale_test_pyramid(exe, compile_program, fetches, image, max_shrink): - # Use image pyramids to detect faces - det_b = detect_face(exe, compile_program, fetches, image, 0.25) - index = np.where( - np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) - > 30)[0] - det_b = det_b[index, :] - - st = [0.75, 1.25, 1.5, 1.75] - for i in range(len(st)): - if st[i] <= max_shrink: - det_temp = detect_face(exe, compile_program, fetches, image, st[i]) - # Enlarged images are only used to detect small faces. - if st[i] > 1: - index = np.where( - np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1, - det_temp[:, 3] - det_temp[:, 1] + 1) < 100)[0] - det_temp = det_temp[index, :] - # Shrinked images are only used to detect big faces. - else: - index = np.where( - np.maximum(det_temp[:, 2] - det_temp[:, 0] + 1, - det_temp[:, 3] - det_temp[:, 1] + 1) > 30)[0] - det_temp = det_temp[index, :] - det_b = np.row_stack((det_b, det_temp)) - return det_b - - -def main(): - """ - Main evaluate function - """ - cfg = load_config(FLAGS.config) - if 'architecture' in cfg: - main_arch = cfg.architecture - else: - raise ValueError("'architecture' not specified in config file.") - - merge_config(FLAGS.opt) - - # check if set use_gpu=True in paddlepaddle cpu version - check_gpu(cfg.use_gpu) - - if 'eval_feed' not in cfg: - eval_feed = create(main_arch + 'EvalFeed') - else: - eval_feed = create(cfg.eval_feed) - - # define executor - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - # build program - model = create(main_arch) - startup_prog = fluid.Program() - eval_prog = fluid.Program() - with fluid.program_guard(eval_prog, startup_prog): - with fluid.unique_name.guard(): - _, feed_vars = create_feed(eval_feed, use_pyreader=False) - fetches = model.eval(feed_vars) - - eval_prog = eval_prog.clone(True) - - # load model - exe.run(startup_prog) - if 'weights' in cfg: - checkpoint.load_params(exe, eval_prog, cfg.weights) - - assert cfg.metric in ['WIDERFACE'], \ - "unknown metric type {}".format(cfg.metric) - - annotation_file = getattr(eval_feed.dataset, 'annotation', None) - dataset_dir = FLAGS.dataset_dir if FLAGS.dataset_dir else \ - getattr(eval_feed.dataset, 'dataset_dir', None) - img_root_dir = dataset_dir - if FLAGS.eval_mode == "widerface": - image_dir = getattr(eval_feed.dataset, 'image_dir', None) - img_root_dir = os.path.join(dataset_dir, image_dir) - gt_file = os.path.join(dataset_dir, annotation_file) - pred_dir = FLAGS.output_eval if FLAGS.output_eval else 'output/pred' - face_eval_run( - exe, - eval_prog, - fetches, - img_root_dir, - gt_file, - pred_dir=pred_dir, - eval_mode=FLAGS.eval_mode, - multi_scale=FLAGS.multi_scale) - - -if __name__ == '__main__': - parser = ArgsParser() - parser.add_argument( - "-d", - "--dataset_dir", - default=None, - type=str, - help="Dataset path, same as DataFeed.dataset.dataset_dir") - parser.add_argument( - "-f", - "--output_eval", - default=None, - type=str, - help="Evaluation file directory, default is current directory.") - parser.add_argument( - "-e", - "--eval_mode", - default="widerface", - type=str, - help="Evaluation mode, include `widerface` and `fddb`, default is `widerface`." - ) - parser.add_argument( - "--multi_scale", - action='store_true', - default=False, - help="If True it will select `multi_scale` evaluation. Default is `False`, it will select `single-scale` evaluation.") - FLAGS = parser.parse_args() - main() diff --git a/PaddleCV/PaddleDetection/tools/infer.py b/PaddleCV/PaddleDetection/tools/infer.py deleted file mode 100644 index ed10814846037dcabdb301e244370848c647a61b..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/tools/infer.py +++ /dev/null @@ -1,269 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import glob - -import numpy as np -from PIL import Image - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be set before -# `import paddle`. Otherwise, it would not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -from paddle import fluid - -from ppdet.utils.cli import print_total_cfg -from ppdet.core.workspace import load_config, merge_config, create -from ppdet.modeling.model_input import create_feed -from ppdet.data.data_feed import create_reader - -from ppdet.utils.eval_utils import parse_fetches -from ppdet.utils.cli import ArgsParser -from ppdet.utils.check import check_gpu -from ppdet.utils.visualizer import visualize_results -import ppdet.utils.checkpoint as checkpoint - -import logging -FORMAT = '%(asctime)s-%(levelname)s: %(message)s' -logging.basicConfig(level=logging.INFO, format=FORMAT) -logger = logging.getLogger(__name__) - - -def get_save_image_name(output_dir, image_path): - """ - Get save image name from source image path. - """ - if not os.path.exists(output_dir): - os.makedirs(output_dir) - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - return os.path.join(output_dir, "{}".format(name)) + ext - - -def get_test_images(infer_dir, infer_img): - """ - Get image path list in TEST mode - """ - assert infer_img is not None or infer_dir is not None, \ - "--infer_img or --infer_dir should be set" - assert infer_img is None or os.path.isfile(infer_img), \ - "{} is not a file".format(infer_img) - assert infer_dir is None or os.path.isdir(infer_dir), \ - "{} is not a directory".format(infer_dir) - images = [] - - # infer_img has a higher priority - if infer_img and os.path.isfile(infer_img): - images.append(infer_img) - return images - - infer_dir = os.path.abspath(infer_dir) - assert os.path.isdir(infer_dir), \ - "infer_dir {} is not a directory".format(infer_dir) - exts = ['jpg', 'jpeg', 'png', 'bmp'] - exts += [ext.upper() for ext in exts] - for ext in exts: - images.extend(glob.glob('{}/*.{}'.format(infer_dir, ext))) - - assert len(images) > 0, "no image found in {}".format(infer_dir) - logger.info("Found {} inference images in total.".format(len(images))) - - return images - - -def main(): - cfg = load_config(FLAGS.config) - - if 'architecture' in cfg: - main_arch = cfg.architecture - else: - raise ValueError("'architecture' not specified in config file.") - - merge_config(FLAGS.opt) - - # check if set use_gpu=True in paddlepaddle cpu version - check_gpu(cfg.use_gpu) - print_total_cfg(cfg) - - if 'test_feed' not in cfg: - test_feed = create(main_arch + 'TestFeed') - else: - test_feed = create(cfg.test_feed) - - test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img) - test_feed.dataset.add_images(test_images) - - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - model = create(main_arch) - - startup_prog = fluid.Program() - infer_prog = fluid.Program() - with fluid.program_guard(infer_prog, startup_prog): - with fluid.unique_name.guard(): - _, feed_vars = create_feed(test_feed, use_pyreader=False) - test_fetches = model.test(feed_vars) - infer_prog = infer_prog.clone(True) - - reader = create_reader(test_feed) - feeder = fluid.DataFeeder(place=place, feed_list=feed_vars.values()) - - exe.run(startup_prog) - if cfg.weights: - checkpoint.load_params(exe, infer_prog, cfg.weights) - - # parse infer fetches - assert cfg.metric in ['COCO', 'VOC', 'WIDERFACE'], \ - "unknown metric type {}".format(cfg.metric) - extra_keys = [] - if cfg['metric'] == 'COCO': - extra_keys = ['im_info', 'im_id', 'im_shape'] - if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE': - extra_keys = ['im_id', 'im_shape'] - keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys) - - # parse dataset category - if cfg.metric == 'COCO': - from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info - if cfg.metric == "VOC": - from ppdet.utils.voc_eval import bbox2out, get_category_info - if cfg.metric == "WIDERFACE": - from ppdet.utils.widerface_eval_utils import bbox2out, get_category_info - - anno_file = getattr(test_feed.dataset, 'annotation', None) - with_background = getattr(test_feed, 'with_background', True) - use_default_label = getattr(test_feed, 'use_default_label', False) - clsid2catid, catid2name = get_category_info(anno_file, with_background, - use_default_label) - - # whether output bbox is normalized in model output layer - is_bbox_normalized = False - if hasattr(model, 'is_bbox_normalized') and \ - callable(model.is_bbox_normalized): - is_bbox_normalized = model.is_bbox_normalized() - - # use tb-paddle to log image - if FLAGS.use_tb: - from tb_paddle import SummaryWriter - tb_writer = SummaryWriter(FLAGS.tb_log_dir) - tb_image_step = 0 - tb_image_frame = 0 # each frame can display ten pictures at most. - - imid2path = reader.imid2path - for iter_id, data in enumerate(reader()): - outs = exe.run(infer_prog, - feed=feeder.feed(data), - fetch_list=values, - return_numpy=False) - res = { - k: (np.array(v), v.recursive_sequence_lengths()) - for k, v in zip(keys, outs) - } - logger.info('Infer iter {}'.format(iter_id)) - - bbox_results = None - mask_results = None - if 'bbox' in res: - bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized) - if 'mask' in res: - mask_results = mask2out([res], clsid2catid, - model.mask_head.resolution) - - # visualize result - im_ids = res['im_id'][0] - for im_id in im_ids: - image_path = imid2path[int(im_id)] - image = Image.open(image_path).convert('RGB') - - # use tb-paddle to log original image - if FLAGS.use_tb: - original_image_np = np.array(image) - tb_writer.add_image( - "original/frame_{}".format(tb_image_frame), - original_image_np, - tb_image_step, - dataformats='HWC') - - image = visualize_results(image, - int(im_id), catid2name, - FLAGS.draw_threshold, bbox_results, - mask_results) - - # use tb-paddle to log image with bbox - if FLAGS.use_tb: - infer_image_np = np.array(image) - tb_writer.add_image( - "bbox/frame_{}".format(tb_image_frame), - infer_image_np, - tb_image_step, - dataformats='HWC') - tb_image_step += 1 - if tb_image_step % 10 == 0: - tb_image_step = 0 - tb_image_frame += 1 - - save_name = get_save_image_name(FLAGS.output_dir, image_path) - logger.info("Detection bbox results save in {}".format(save_name)) - image.save(save_name, quality=95) - - -if __name__ == '__main__': - parser = ArgsParser() - parser.add_argument( - "--infer_dir", - type=str, - default=None, - help="Directory for images to perform inference on.") - parser.add_argument( - "--infer_img", - type=str, - default=None, - help="Image path, has higher priority over --infer_dir") - parser.add_argument( - "--output_dir", - type=str, - default="output", - help="Directory for storing the output visualization files.") - parser.add_argument( - "--draw_threshold", - type=float, - default=0.5, - help="Threshold to reserve the result for visualization.") - parser.add_argument( - "--use_tb", - type=bool, - default=False, - help="whether to record the data to Tensorboard.") - parser.add_argument( - '--tb_log_dir', - type=str, - default="tb_log_dir/image", - help='Tensorboard logging directory for image.') - FLAGS = parser.parse_args() - main() diff --git a/PaddleCV/PaddleDetection/tools/train.py b/PaddleCV/PaddleDetection/tools/train.py deleted file mode 100644 index 6d04c665ecbae873a043624a80661c385194fe9e..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/tools/train.py +++ /dev/null @@ -1,376 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -import numpy as np -import random -import datetime -from collections import deque - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be set before -# `import paddle`. Otherwise, it would not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -from paddle import fluid -from paddle.fluid import profiler - -from ppdet.experimental import mixed_precision_context -from ppdet.core.workspace import load_config, merge_config, create -from ppdet.data.data_feed import create_reader - -from ppdet.utils.cli import print_total_cfg -from ppdet.utils import dist_utils -from ppdet.utils.eval_utils import parse_fetches, eval_run, eval_results -from ppdet.utils.stats import TrainingStats -from ppdet.utils.cli import ArgsParser -from ppdet.utils.check import check_gpu -import ppdet.utils.checkpoint as checkpoint -from ppdet.modeling.model_input import create_feed - -import logging -FORMAT = '%(asctime)s-%(levelname)s: %(message)s' -logging.basicConfig(level=logging.INFO, format=FORMAT) -logger = logging.getLogger(__name__) - - -def main(): - env = os.environ - FLAGS.dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env - if FLAGS.dist: - trainer_id = int(env['PADDLE_TRAINER_ID']) - local_seed = (99 + trainer_id) - random.seed(local_seed) - np.random.seed(local_seed) - - if FLAGS.enable_ce: - random.seed(0) - np.random.seed(0) - - cfg = load_config(FLAGS.config) - if 'architecture' in cfg: - main_arch = cfg.architecture - else: - raise ValueError("'architecture' not specified in config file.") - - merge_config(FLAGS.opt) - - if 'log_iter' not in cfg: - cfg.log_iter = 20 - - # check if set use_gpu=True in paddlepaddle cpu version - check_gpu(cfg.use_gpu) - if not FLAGS.dist or trainer_id == 0: - print_total_cfg(cfg) - - if cfg.use_gpu: - devices_num = fluid.core.get_cuda_device_count() - else: - devices_num = int(os.environ.get('CPU_NUM', 1)) - - if 'train_feed' not in cfg: - train_feed = create(main_arch + 'TrainFeed') - else: - train_feed = create(cfg.train_feed) - - if FLAGS.eval: - if 'eval_feed' not in cfg: - eval_feed = create(main_arch + 'EvalFeed') - else: - eval_feed = create(cfg.eval_feed) - - if 'FLAGS_selected_gpus' in env: - device_id = int(env['FLAGS_selected_gpus']) - else: - device_id = 0 - place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - lr_builder = create('LearningRate') - optim_builder = create('OptimizerBuilder') - - # build program - startup_prog = fluid.Program() - train_prog = fluid.Program() - if FLAGS.enable_ce: - startup_prog.random_seed = 1000 - train_prog.random_seed = 1000 - with fluid.program_guard(train_prog, startup_prog): - with fluid.unique_name.guard(): - model = create(main_arch) - train_pyreader, feed_vars = create_feed(train_feed) - - if FLAGS.fp16: - assert (getattr(model.backbone, 'norm_type', None) - != 'affine_channel'), \ - '--fp16 currently does not support affine channel, ' \ - ' please modify backbone settings to use batch norm' - - with mixed_precision_context(FLAGS.loss_scale, FLAGS.fp16) as ctx: - train_fetches = model.train(feed_vars) - - loss = train_fetches['loss'] - if FLAGS.fp16: - loss *= ctx.get_loss_scale_var() - lr = lr_builder() - optimizer = optim_builder(lr) - optimizer.minimize(loss) - if FLAGS.fp16: - loss /= ctx.get_loss_scale_var() - - # parse train fetches - train_keys, train_values, _ = parse_fetches(train_fetches) - train_values.append(lr) - - if FLAGS.eval: - eval_prog = fluid.Program() - with fluid.program_guard(eval_prog, startup_prog): - with fluid.unique_name.guard(): - model = create(main_arch) - eval_pyreader, feed_vars = create_feed(eval_feed) - fetches = model.eval(feed_vars) - eval_prog = eval_prog.clone(True) - - eval_reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir) - eval_pyreader.decorate_sample_list_generator(eval_reader, place) - - # parse eval fetches - extra_keys = [] - if cfg.metric == 'COCO': - extra_keys = ['im_info', 'im_id', 'im_shape'] - if cfg.metric == 'VOC': - extra_keys = ['gt_box', 'gt_label', 'is_difficult'] - if cfg.metric == 'WIDERFACE': - extra_keys = ['im_id', 'im_shape', 'gt_box'] - eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog, - extra_keys) - - # compile program for multi-devices - build_strategy = fluid.BuildStrategy() - build_strategy.fuse_all_optimizer_ops = False - build_strategy.fuse_elewise_add_act_ops = True - # only enable sync_bn in multi GPU devices - sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn' - build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \ - and cfg.use_gpu - - exec_strategy = fluid.ExecutionStrategy() - # iteration number when CompiledProgram tries to drop local execution scopes. - # Set it to be 1 to save memory usages, so that unused variables in - # local execution scopes can be deleted after each iteration. - exec_strategy.num_iteration_per_drop_scope = 1 - if FLAGS.dist: - dist_utils.prepare_for_multi_process(exe, build_strategy, startup_prog, - train_prog) - exec_strategy.num_threads = 1 - - exe.run(startup_prog) - compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( - loss_name=loss.name, - build_strategy=build_strategy, - exec_strategy=exec_strategy) - - if FLAGS.eval: - compiled_eval_prog = fluid.compiler.CompiledProgram(eval_prog) - - fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel' - - ignore_params = cfg.finetune_exclude_pretrained_params \ - if 'finetune_exclude_pretrained_params' in cfg else [] - - start_iter = 0 - if FLAGS.resume_checkpoint: - checkpoint.load_checkpoint(exe, train_prog, FLAGS.resume_checkpoint) - start_iter = checkpoint.global_step() - elif cfg.pretrain_weights and fuse_bn and not ignore_params: - checkpoint.load_and_fusebn(exe, train_prog, cfg.pretrain_weights) - elif cfg.pretrain_weights: - checkpoint.load_params( - exe, train_prog, cfg.pretrain_weights, ignore_params=ignore_params) - - train_reader = create_reader(train_feed, (cfg.max_iters - start_iter) * - devices_num, FLAGS.dataset_dir) - train_pyreader.decorate_sample_list_generator(train_reader, place) - - # whether output bbox is normalized in model output layer - is_bbox_normalized = False - if hasattr(model, 'is_bbox_normalized') and \ - callable(model.is_bbox_normalized): - is_bbox_normalized = model.is_bbox_normalized() - - # if map_type not set, use default 11point, only use in VOC eval - map_type = cfg.map_type if 'map_type' in cfg else '11point' - - train_stats = TrainingStats(cfg.log_smooth_window, train_keys) - train_pyreader.start() - start_time = time.time() - end_time = time.time() - - cfg_name = os.path.basename(FLAGS.config).split('.')[0] - save_dir = os.path.join(cfg.save_dir, cfg_name) - time_stat = deque(maxlen=cfg.log_smooth_window) - best_box_ap_list = [0.0, 0] #[map, iter] - - # use tb-paddle to log data - if FLAGS.use_tb: - from tb_paddle import SummaryWriter - tb_writer = SummaryWriter(FLAGS.tb_log_dir) - tb_loss_step = 0 - tb_mAP_step = 0 - - for it in range(start_iter, cfg.max_iters): - start_time = end_time - end_time = time.time() - time_stat.append(end_time - start_time) - time_cost = np.mean(time_stat) - eta_sec = (cfg.max_iters - it) * time_cost - eta = str(datetime.timedelta(seconds=int(eta_sec))) - outs = exe.run(compiled_train_prog, fetch_list=train_values) - stats = {k: np.array(v).mean() for k, v in zip(train_keys, outs[:-1])} - - # use tb-paddle to log loss - if FLAGS.use_tb: - if it % cfg.log_iter == 0: - for loss_name, loss_value in stats.items(): - tb_writer.add_scalar(loss_name, loss_value, tb_loss_step) - tb_loss_step += 1 - - train_stats.update(stats) - logs = train_stats.log() - if it % cfg.log_iter == 0 and (not FLAGS.dist or trainer_id == 0): - strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format( - it, np.mean(outs[-1]), logs, time_cost, eta) - logger.info(strs) - - #only for continuous evaluation - if FLAGS.enable_ce and it == cfg.max_iters - 1: - print("kpis\t{}_train_loss\t{}".format(cfg.architecture, stats['loss'])) - print("kpis\t{}_train_time\t{}".format(cfg.architecture, time_cost)) - - # profiler tools, used for benchmark - if FLAGS.is_profiler and it == 5: - profiler.start_profiler("All") - elif FLAGS.is_profiler and it == 10: - profiler.stop_profiler("total", FLAGS.profiler_path) - return - - if (it > 0 and it % cfg.snapshot_iter == 0 or it == cfg.max_iters - 1) \ - and (not FLAGS.dist or trainer_id == 0): - save_name = str(it) if it != cfg.max_iters - 1 else "model_final" - checkpoint.save(exe, train_prog, os.path.join(save_dir, save_name)) - - if FLAGS.eval: - # evaluation - results = eval_run(exe, compiled_eval_prog, eval_pyreader, - eval_keys, eval_values, eval_cls) - resolution = None - if 'mask' in results[0]: - resolution = model.mask_head.resolution - box_ap_stats = eval_results( - results, eval_feed, cfg.metric, cfg.num_classes, resolution, - is_bbox_normalized, FLAGS.output_eval, map_type) - - # use tb_paddle to log mAP - if FLAGS.use_tb: - tb_writer.add_scalar("mAP", box_ap_stats[0], tb_mAP_step) - tb_mAP_step += 1 - - if box_ap_stats[0] > best_box_ap_list[0]: - best_box_ap_list[0] = box_ap_stats[0] - best_box_ap_list[1] = it - checkpoint.save(exe, train_prog, - os.path.join(save_dir, "best_model")) - logger.info("Best test box ap: {}, in iter: {}".format( - best_box_ap_list[0], best_box_ap_list[1])) - - train_pyreader.reset() - - -if __name__ == '__main__': - parser = ArgsParser() - parser.add_argument( - "-r", - "--resume_checkpoint", - default=None, - type=str, - help="Checkpoint path for resuming training.") - parser.add_argument( - "--fp16", - action='store_true', - default=False, - help="Enable mixed precision training.") - parser.add_argument( - "--loss_scale", - default=8., - type=float, - help="Mixed precision training loss scale.") - parser.add_argument( - "--eval", - action='store_true', - default=False, - help="Whether to perform evaluation in train") - parser.add_argument( - "--output_eval", - default=None, - type=str, - help="Evaluation directory, default is current directory.") - parser.add_argument( - "-d", - "--dataset_dir", - default=None, - type=str, - help="Dataset path, same as DataFeed.dataset.dataset_dir") - parser.add_argument( - "--use_tb", - type=bool, - default=False, - help="whether to record the data to Tensorboard.") - parser.add_argument( - '--tb_log_dir', - type=str, - default="tb_log_dir/scalar", - help='Tensorboard logging directory for scalar.') - parser.add_argument( - '--enable_ce', - type=bool, - default=False, - help="If set True, enable continuous evaluation job." - "This flag is only used for internal test.") - - #NOTE:args for profiler tools, used for benchmark - parser.add_argument( - '--is_profiler', - type=int, - default=0, - help='The switch of profiler tools. (used for benchmark)') - parser.add_argument( - '--profiler_path', - type=str, - default="./", - help='The profiler output file path. (used for benchmark)') - FLAGS = parser.parse_args() - main() diff --git a/PaddleCV/Research/AGEchallenge/Classification/1. PrepareData.ipynb b/PaddleCV/Research/AGEchallenge/Classification/1. PrepareData.ipynb deleted file mode 100644 index 438786023ccd0e83bf29163079ab0c5b2f931e12..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/Classification/1. PrepareData.ipynb +++ /dev/null @@ -1,368 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)\n", - "## Angle closure classification Baseline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## requirement install" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install xlrd" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Zip File Extract\n", - "\n", - "Assume `Training100.zip` and `Validation_ASOCT_Image.zip` are stored @ `./AGE_challenge Baseline/datasets/`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!unzip -q ../datasets/Training100.zip -d ../datasets/\n", - "!unzip -q ../datasets/Validation_ASOCT_Image.zip -d ../datasets/" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Explore Data & Train/Val split" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import csv\n", - "import matplotlib.pyplot as plt\n", - "import cv2\n", - "import os, shutil\n", - "import pprint\n", - "import pandas as pd\n", - "\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "data_root_path = \"../datasets/Training100/\"\n", - "xlsx_file_path = os.path.join(data_root_path, \"Training100_Location.xlsx\")\n", - "\n", - "# Load\n", - "image_path = os.path.join(data_root_path, \"ASOCT_Image\")\n", - "label_file_path = os.path.join(data_root_path, \"train_cls.csv\")\n", - "\n", - "# Save\n", - "train_file_path = os.path.join(data_root_path, \"cls_train_split.csv\")\n", - "val_file_path = os.path.join(data_root_path, \"cls_val_split.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ASOCT_NameLeft_LabelX1Y1Right_LabelX2Y2
0T0056-10.jpg1228.833656466.95960111870.803864451.592300
1T0047-06.jpg1207.935545525.93876411792.231404432.521881
2T0066-15.jpg0239.372633476.27392501899.775568501.007410
3T0025-15.jpg0177.708404545.65593501862.380363439.228928
4T0088-06.jpg0285.256170735.07601401884.122651767.858589
\n", - "
" - ], - "text/plain": [ - " ASOCT_Name Left_Label X1 Y1 Right_Label X2 \\\n", - "0 T0056-10.jpg 1 228.833656 466.959601 1 1870.803864 \n", - "1 T0047-06.jpg 1 207.935545 525.938764 1 1792.231404 \n", - "2 T0066-15.jpg 0 239.372633 476.273925 0 1899.775568 \n", - "3 T0025-15.jpg 0 177.708404 545.655935 0 1862.380363 \n", - "4 T0088-06.jpg 0 285.256170 735.076014 0 1884.122651 \n", - "\n", - " Y2 \n", - "0 451.592300 \n", - "1 432.521881 \n", - "2 501.007410 \n", - "3 439.228928 \n", - "4 767.858589 " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "xlsx_file = pd.read_excel(xlsx_file_path)\n", - "xlsx_file.to_csv(label_file_path, \n", - " index=False, columns=['ASOCT_Name', 'Left_Label', 'Right_Label'])\n", - "xlsx_file.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[['ASOCT_Name', 'Left_Label', 'Right_Label'],\n", - " ['T0056-10.jpg', '1', '1'],\n", - " ['T0047-06.jpg', '1', '1'],\n", - " ['T0066-15.jpg', '0', '0'],\n", - " ['T0025-15.jpg', '0', '0']]\n" - ] - } - ], - "source": [ - "data_list = []\n", - "\n", - "with open(label_file_path,'r') as f: \n", - " lines=csv.reader(f) \n", - " for key, line in enumerate(lines): \n", - " data_list.append(line)\n", - " \n", - "pprint.pprint(data_list[:5])" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1280, 320]\n", - "[1280, 320]\n" - ] - } - ], - "source": [ - "# left, right\n", - "# negative sample (label==0): 1280, 1280\n", - "# positive sample (label==1): 320, 320\n", - "left_label_counter = [0, 0]\n", - "right_label_counter = [0, 0]\n", - "\n", - "for line in data_list[1:]:\n", - " file_name, l_label, r_label = line\n", - " left_label_counter[int(l_label)] += 1\n", - " right_label_counter[int(r_label)] += 1\n", - " \n", - "print(left_label_counter)\n", - "print(right_label_counter)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# Left label == Right label, in **TRAINING SET**\n", - "for line in data_list[1:]:\n", - " file_name, l_label, r_label = line\n", - " if int(l_label) != int(r_label):\n", - " print(line)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Train/Val Split" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def train_val_split(data_list, train_ratio=0.8, shuffle_seed=42):\n", - " testee_list = list(set( [line[0].split(\"-\")[0] for line in data_list[1:]] ))\n", - " \n", - " # Split by patient id, prevent data leakage\n", - " val_testee_idx = np.random.choice(testee_list, int(len(testee_list) * (1-train_ratio)), replace=False)\n", - "\n", - " train_list = []\n", - " val_list = []\n", - " \n", - " for line in data_list[1:]:\n", - " file_name, _, _ = line\n", - " if file_name.split(\"-\")[0] in val_testee_idx:\n", - " val_list.append(line)\n", - " else:\n", - " train_list.append(line)\n", - " \n", - " return train_list, val_list" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1296\n", - "304\n" - ] - } - ], - "source": [ - "train_data_list, val_data_list = train_val_split(data_list)\n", - "print(len(train_data_list))\n", - "print(len(val_data_list))" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "# Write to files\n", - "with open(train_file_path, \"w+\") as f:\n", - " for line in train_data_list:\n", - "# file_name, l_label, r_label = line\n", - " f.write(\"{},{},{}\\n\".format(*line))\n", - " \n", - "with open(val_file_path, \"w+\") as f:\n", - " for line in val_data_list:\n", - " f.write(\"{},{},{}\\n\".format(*line))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/PaddleCV/Research/AGEchallenge/Classification/2. Train.ipynb b/PaddleCV/Research/AGEchallenge/Classification/2. Train.ipynb deleted file mode 100644 index 11d2631e2a9bdf52ffeb4cd8ad590871069d90f4..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/Classification/2. Train.ipynb +++ /dev/null @@ -1,558 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)\n", - "## Angle closure classification Baseline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Training\n", - "\n", - "- Assume `Training100.zip` and `Validation_ASOCT_Image.zip` are stored @ `./AGE_challenge Baseline/datasets/`\n", - "- Assume `weights` are stored @ `./AGE_challenge Baseline/weights/`\n", - "- In training phase, we use standard ResNet34 with `sigmoid(fc(1))` output\n", - "- We split a single image into two parts" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Download ImageNet weight" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2019-08-06 13:36:07-- https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar\n", - "Resolving paddle-imagenet-models-name.bj.bcebos.com (paddle-imagenet-models-name.bj.bcebos.com)... 220.181.33.44, 220.181.33.43\n", - "Connecting to paddle-imagenet-models-name.bj.bcebos.com (paddle-imagenet-models-name.bj.bcebos.com)|220.181.33.44|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 87470080 (83M) [application/x-tar]\n", - "Saving to: ‘../weights/ResNet34_pretrained.tar’\n", - "\n", - "ResNet34_pretrained 100%[===================>] 83.42M 1.80MB/s in 66s \n", - "\n", - "2019-08-06 13:37:13 (1.27 MB/s) - ‘../weights/ResNet34_pretrained.tar’ saved [87470080/87470080]\n", - "\n" - ] - } - ], - "source": [ - "# https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification\n", - "!rm ../weights/ResNet34_pretrained.tar \n", - "!rm -rf ../weights/ResNet34_pretrained\n", - "\n", - "!wget -P ../weights/ https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar \n", - "!tar xvf ../weights/ResNet34_pretrained.tar -C ../weights/ > /dev/null # silent\n", - "!rm ../weights/ResNet34_pretrained/fc*" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Main Code" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import os, random, functools, math\n", - "import cv2\n", - "import numpy as np\n", - "import time\n", - "from sklearn.metrics import roc_auc_score, confusion_matrix, roc_curve" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running Verify Fluid Program ... \n", - "Your Paddle Fluid works well on SINGLE GPU or CPU.\n", - "Your Paddle Fluid works well on MUTIPLE GPU or CPU.\n", - "Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now\n" - ] - } - ], - "source": [ - "import paddle\n", - "import paddle.fluid as fluid\n", - "import paddle.fluid.layers as FL\n", - "import paddle.fluid.optimizer as FO\n", - "fluid.install_check.run_check()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "from resnet import *" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "data_root_path = \"../datasets/Training100/\"\n", - "image_path = os.path.join(data_root_path, \"ASOCT_Image\")\n", - "\n", - "train_file_path = os.path.join(data_root_path, \"cls_train_split.csv\")\n", - "val_file_path = os.path.join(data_root_path, \"cls_val_split.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "BATCH_SIZE = 32 // 2 # image split * 2\n", - "THREAD = 8\n", - "BUF_SIZE = 32" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define Data Loader" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# Real time data augmentation in training\n", - "\n", - "def rotate_image(image, angle=90, scale=1.0):\n", - " '''\n", - " Rotate the image\n", - " :param image: image to be processed\n", - " :param angle: Rotation angle in degrees. Positive values mean counter-clockwise rotation (the coordinate origin is assumed to be the top-left corner).\n", - " :param scale: Isotropic scale factor.\n", - " '''\n", - " w = image.shape[1]\n", - " h = image.shape[0]\n", - " #rotate matrix\n", - " M = cv2.getRotationMatrix2D((w/2,h/2), angle, scale)\n", - " #rotate\n", - " image = cv2.warpAffine(image,M,(w,h))\n", - " return image\n", - "\n", - "def vflip_image(image):\n", - " return cv2.flip(image, flipCode=1)\n", - "\n", - "def crop_image(img, target_size, center):\n", - " \"\"\" crop_image \"\"\"\n", - " height, width = img.shape[:2]\n", - " size = target_size\n", - " if center == True:\n", - " w_start = (width - size) // 2\n", - " h_start = (height - size) // 2\n", - " else:\n", - " w_start = np.random.randint(0, width - size + 1)\n", - " h_start = np.random.randint(0, height - size + 1)\n", - " w_end = w_start + size\n", - " h_end = h_start + size\n", - " img = img[h_start:h_end, w_start:w_end, :]\n", - " return img\n", - "\n", - "def split_image(img):\n", - " rows,_,_ = img.shape\n", - " # left, right split\n", - " return [img[:, :rows, :], img[:, -rows:, :]]" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# data reader and xmap wrapper to enable multiprocessing data load\n", - "\n", - "def reader(img_path, file_list, batch_size=32, shuffle=True, shuffle_seed=42):\n", - " def read_file_list():\n", - " batch_data = []\n", - " np.random.shuffle(file_list)\n", - " for line in file_list:\n", - " single_img_path, l_label, r_label = line.split(\",\")\n", - " batch_data.append([single_img_path, int(l_label), int(r_label)])\n", - " if len(batch_data) == batch_size:\n", - " yield batch_data\n", - " batch_data = []\n", - " if len(batch_data) != 0:\n", - " yield batch_data\n", - " return read_file_list\n", - "\n", - "def process_batch_data(input_data, mode, rotate=True, flip=True):\n", - " batch_data = []\n", - " for sample in input_data:\n", - " file, l_label, r_label = sample\n", - "\n", - " img = cv2.imread( file )\n", - " img = img[:, :, ::-1].astype('float32') / 255\n", - " \n", - " img = np.concatenate(split_image(img), axis=-1) # concat at channel dim\n", - " img = cv2.resize(img, (256, 256))\n", - " \n", - " if mode == 'train':\n", - " img = crop_image(img, target_size=224, center=False)\n", - "# img = img + np.random.randn(*img.shape) * 0.3 / 255 \n", - " if rotate:\n", - " angle = np.random.randint(1, 30, size=1)\n", - " img = rotate_image(img, angle)\n", - " if flip and np.random.randint(0,2):\n", - " img = vflip_image(img)\n", - " else:\n", - " img = crop_image(img, target_size=224, center=True)\n", - " \n", - " img = img.transpose((2, 0, 1))\n", - "\n", - " batch_data.append((img[:3,:,:], l_label))\n", - " batch_data.append((img[3:,:,:], r_label))\n", - "\n", - " return batch_data" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "def data_loader(img_list, img_path, batch_size, order=False, mode='train'):\n", - " data_reader = reader(img_path, img_list, batch_size)\n", - " mapper = functools.partial(process_batch_data, mode=mode)\n", - " \n", - " data_reader = paddle.reader.shuffle(data_reader, 32)\n", - " \n", - " return paddle.reader.xmap_readers(\n", - " mapper, data_reader, THREAD, BUF_SIZE, order=order)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "with open(train_file_path) as flist:\n", - " train_file_list = [os.path.join(image_path,line.strip()) for line in flist]\n", - "\n", - "with open(val_file_path) as flist:\n", - " val_file_list = [os.path.join(image_path,line.strip()) for line in flist]" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1296\n", - "304\n", - "../datasets/Training100/ASOCT_Image/T0047-06.jpg,1,1\n" - ] - } - ], - "source": [ - "print(len(train_file_list))\n", - "print(len(val_file_list))\n", - "\n", - "print(train_file_list[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "np.random.shuffle(train_file_list)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1072, 224]\n" - ] - } - ], - "source": [ - "# Class imbalance\n", - "classes_collaction = [0] * 2\n", - "for line in train_file_list:\n", - " file, c_l, c_r = line.split(\",\")\n", - " classes_collaction[int(c_l)] +=1\n", - " \n", - "print(classes_collaction)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "train_dataloader = data_loader(train_file_list, image_path, BATCH_SIZE, False, mode='train')\n", - "val_dataloader = data_loader(val_file_list, image_path, BATCH_SIZE, True, mode='val')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define model (compute graph)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "def network():\n", - " data_shape = [3, 224, 224]\n", - " \n", - " model = ResNet34()\n", - " \n", - " input_feature = FL.data(name='pixel', shape=data_shape, dtype='float32')\n", - " label = FL.data(name='label', shape=[1], dtype='int64')\n", - " \n", - " logit = model.net(input_feature, class_dim=1)\n", - " predict = FL.sigmoid(logit)\n", - "\n", - " reader = fluid.io.PyReader(feed_list=[input_feature, label], \n", - " capacity=64, iterable=True, use_double_buffer=True)\n", - "\n", - " cost = FL.log_loss(predict, FL.cast(label, \"float32\"), epsilon=1e-7)\n", - " loss = FL.mean(cost)\n", - "\n", - " accuracy = FL.mean(FL.cast(FL.equal(FL.cast(FL.round(predict),\"int64\"), label), \"float32\") )\n", - " \n", - " return [loss, accuracy, predict, reader]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "def calc_auc_numpy(y_pred, y_true):\n", - " auc = roc_auc_score(y_true, y_pred)\n", - "\n", - " fpr, tpr, thresh = roc_curve(y_true, y_pred)\n", - " optimal_idx = np.argmax(tpr - fpr)\n", - " \n", - " print(\"Best Sensi: %1.4f\" % (tpr[optimal_idx]))\n", - " print(\"Best Speci: %1.4f\" % (1-fpr[optimal_idx]))\n", - " print(\"Best Thresh: %1.4f\" % (thresh[optimal_idx]))\n", - " \n", - " y_pred = (y_pred > 0.5).astype(np.int_)\n", - " print(confusion_matrix(y_true, y_pred))\n", - " \n", - " return auc" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "def train(use_cuda, params_dirname_prefix, pretrained_model=False, EPOCH_NUM=10):\n", - " place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()\n", - " \n", - " startup_prog = fluid.Program()\n", - " train_prog = fluid.Program()\n", - " val_prog = fluid.Program()\n", - "\n", - " with fluid.program_guard(train_prog, startup_prog):\n", - " # fluid.unique_name.guard() to share parameters with test network\n", - " with fluid.unique_name.guard():\n", - " train_loss, train_acc, train_output, train_reader = network()\n", - " \n", - " optimizer = fluid.optimizer.Adam(learning_rate=1e-4)\n", - " optimizer.minimize(train_loss)\n", - " \n", - " # 定义预测网络\n", - " with fluid.program_guard(val_prog, startup_prog):\n", - " # Use fluid.unique_name.guard() to share parameters with train network\n", - " with fluid.unique_name.guard():\n", - " val_loss, val_acc, val_output, val_reader = network()\n", - "\n", - " val_prog = val_prog.clone(for_test=True)\n", - "\n", - " train_loss.persistable = True\n", - " train_acc.persistable = True\n", - " val_loss.persistable = True\n", - " val_acc.persistable = True\n", - " val_output.persistable = True\n", - " \n", - " exe = fluid.Executor(place)\n", - " exe.run(startup_prog)\n", - "\n", - " if pretrained_model:\n", - " def if_exist(var):\n", - " return os.path.exists(os.path.join(pretrained_model, var.name))\n", - "\n", - " fluid.io.load_vars(\n", - " exe, pretrained_model, main_program=train_prog, predicate=if_exist)\n", - "\n", - " train_reader.decorate_sample_list_generator( train_dataloader, places=place )\n", - " val_reader.decorate_sample_list_generator( val_dataloader, places=place )\n", - "\n", - " # For training test cost\n", - " def train_test(val_prog, val_reader):\n", - " count = 0\n", - " accumulated = [0,0]\n", - " \n", - " prediction = []\n", - " label_values = []\n", - " \n", - " for tid, val_data in enumerate(val_reader()):\n", - " avg_cost_np = exe.run(\n", - " program=val_prog,\n", - " feed=val_data,\n", - " fetch_list=[val_loss, val_acc, val_output],\n", - " use_program_cache=True)\n", - " accumulated = [\n", - " x[0] + x[1][0] for x in zip(accumulated, avg_cost_np)\n", - " ]\n", - " prediction.append(avg_cost_np[2])\n", - " label_values.append( np.array(val_data[0]['label']) )\n", - " count += 1\n", - "\n", - " prediction = np.concatenate(prediction, 0)\n", - " label_values = np.concatenate(label_values, 0)\n", - " \n", - " auc = calc_auc_numpy(prediction, label_values)\n", - " \n", - " return [x / count for x in accumulated], auc\n", - "\n", - " # main train loop.\n", - " def train_loop():\n", - " step = 0\n", - " best_auc = 0.\n", - "\n", - " for pass_id in range(EPOCH_NUM):\n", - " data_load_time = time.time()\n", - " for step_id, data_train in enumerate(train_reader()):\n", - " data_load_costtime = time.time() - data_load_time\n", - " start_time = time.time()\n", - " avg_loss_value = exe.run(\n", - " train_prog,\n", - " feed=data_train,\n", - " fetch_list=[train_loss, train_acc], \n", - " use_program_cache=True)\n", - " cost_time = time.time() - start_time\n", - " if step_id % 50 == 0:\n", - " print(\"Pass %d, Epoch %d, Cost %f, Acc %f, Time %f, LoadTime %f\" % (\n", - " step_id, pass_id, avg_loss_value[0], avg_loss_value[1], cost_time, data_load_costtime))\n", - " else:\n", - " pass\n", - " step += 1\n", - " data_load_time = time.time()\n", - "\n", - " metrics, auc = train_test(val_prog, val_reader)\n", - " avg_cost_test, accuracy_test = metrics\n", - " \n", - " print('Test with Epoch {0}, Loss {1:2.4}, Acc {2:2.4}, Auc {3:2.4}'.format(\n", - " pass_id, avg_cost_test, accuracy_test, auc))\n", - " \n", - " if auc >= best_auc:\n", - " best_data = [pass_id, avg_cost_test, accuracy_test, auc]\n", - " best_auc = auc\n", - " print(\"\\nBest AUC, Checkpoint Saved!\\n\")\n", - " if not os.path.isdir(params_dirname_prefix+\"_best/\"):\n", - " os.makedirs(params_dirname_prefix+\"_best/\")\n", - " fluid.io.save_persistables(exe, params_dirname_prefix+\"_best/\", main_program=train_prog)\n", - "\n", - " if not os.path.isdir(params_dirname_prefix+\"_checkpoint/\"):\n", - " os.makedirs(params_dirname_prefix+\"_checkpoint/\")\n", - " fluid.io.save_persistables(exe, params_dirname_prefix+\"_checkpoint/\", main_program=train_prog)\n", - " train_loop()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# download imagenet pretrain weight from:\n", - "# https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification\n", - "# remove ResNet34_pretrained/fc*\n", - "train(use_cuda=True, params_dirname_prefix=\"../weights/classify_weights\", \n", - " pretrained_model=\"../weights/ResNet34_pretrained\", EPOCH_NUM=20)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/PaddleCV/Research/AGEchallenge/Classification/3. Inference.ipynb b/PaddleCV/Research/AGEchallenge/Classification/3. Inference.ipynb deleted file mode 100644 index d88a7f9335663790e68de313bd3b1bffb438dd69..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/Classification/3. Inference.ipynb +++ /dev/null @@ -1,329 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)\n", - "## Angle closure classification Baseline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inference\n", - "\n", - "- Assume `Training100.zip` and `Validation_ASOCT_Image.zip` are stored @ `./AGE_challenge Baseline/datasets/`\n", - "- Assume `weights` are stored @ `./AGE_challenge Baseline/weights/`\n", - "- In training phase, we use standard ResNet34 with `sigmoid(fc(1))` output\n", - "- We split a single image into two parts" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os, random, functools, math\n", - "import cv2\n", - "import numpy as np\n", - "import time\n", - "from sklearn.metrics import roc_auc_score, confusion_matrix, roc_curve" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running Verify Fluid Program ... \n", - "Your Paddle Fluid works well on SINGLE GPU or CPU.\n", - "Your Paddle Fluid works well on MUTIPLE GPU or CPU.\n", - "Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now\n" - ] - } - ], - "source": [ - "import paddle\n", - "import paddle.fluid as fluid\n", - "import paddle.fluid.layers as FL\n", - "import paddle.fluid.optimizer as FO\n", - "fluid.install_check.run_check()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "from resnet import *" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "data_root_path = \"../datasets/Training100/\"\n", - "image_path = os.path.join(data_root_path, \"ASOCT_Image\")\n", - "\n", - "val_file_path = os.path.join(data_root_path, \"cls_val_split.csv\")\n", - "\n", - "output_file = \"./Classification_Results.csv\"" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "BATCH_SIZE = 32 // 2 # image split * 2\n", - "THREAD = 8\n", - "BUF_SIZE = 32" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define Inference Data Loader" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# Real time data augmentation\n", - "def crop_image(img, target_size, center):\n", - " \"\"\" crop_image \"\"\"\n", - " height, width = img.shape[:2]\n", - " size = target_size\n", - " if center == True:\n", - " w_start = (width - size) // 2\n", - " h_start = (height - size) // 2\n", - " else:\n", - " w_start = np.random.randint(0, width - size + 1)\n", - " h_start = np.random.randint(0, height - size + 1)\n", - " w_end = w_start + size\n", - " h_end = h_start + size\n", - " img = img[h_start:h_end, w_start:w_end, :]\n", - " return img\n", - "\n", - "def split_image(img):\n", - " rows,_,_ = img.shape\n", - " # left, right split\n", - " return [img[:, :rows, :], img[:, -rows:, :]]\n", - " \n", - "# data reader and xmap wrapper to enable multiprocessing data load\n", - "\n", - "def reader(img_path, file_list, batch_size=32, shuffle=True, shuffle_seed=42):\n", - " def read_file_list():\n", - " batch_data = []\n", - " np.random.shuffle(file_list)\n", - " for line in file_list:\n", - " single_img_path, _, _ = line.split(\",\")\n", - " batch_data.append(single_img_path)\n", - " if len(batch_data) == batch_size:\n", - " yield batch_data\n", - " batch_data = []\n", - " if len(batch_data) != 0:\n", - " yield batch_data\n", - " return read_file_list\n", - "\n", - "def process_batch_data(input_data):\n", - " batch_data = []\n", - " for sample in input_data:\n", - " file = sample\n", - "\n", - " img = cv2.imread( file )\n", - " img = img[:, :, ::-1].astype('float32') / 255\n", - " \n", - " img = np.concatenate(split_image(img), axis=-1) # concat at channel dim\n", - " img = cv2.resize(img, (256, 256))\n", - " \n", - " img = crop_image(img, target_size=224, center=True)\n", - " \n", - " img = img.transpose((2, 0, 1))\n", - "\n", - " batch_data.append((file, 0, img[:3,:,:]))\n", - " batch_data.append((file, 1, img[3:,:,:]))\n", - "\n", - " return batch_data" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def data_loader(img_list, img_path, batch_size, order=False):\n", - " data_reader = reader(img_path, img_list, batch_size)\n", - " mapper = functools.partial(process_batch_data)\n", - " \n", - " data_reader = paddle.reader.shuffle(data_reader, 32)\n", - " \n", - " return paddle.reader.xmap_readers(\n", - " mapper, data_reader, THREAD, BUF_SIZE, order=order)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "with open(val_file_path) as flist:\n", - " val_file_list = [os.path.join(image_path,line.strip()) for line in flist]\n", - " \n", - "val_dataloader = data_loader(val_file_list, image_path, BATCH_SIZE, True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define model (compute graph)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "def network():\n", - " data_shape = [3, 224, 224]\n", - " \n", - " model = ResNet34()\n", - " \n", - " input_feature = FL.data(name='pixel', shape=data_shape, dtype='float32')\n", - " \n", - " logit = model.net(input_feature, class_dim=1)\n", - " predict = FL.sigmoid(logit)\n", - "\n", - " return predict" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "def inference(use_cuda, pretrained_model, threshold=0.5):\n", - " place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()\n", - " \n", - " startup_prog = fluid.Program()\n", - " val_prog = fluid.Program()\n", - "\n", - " # 定义预测网络\n", - " with fluid.program_guard(val_prog, startup_prog):\n", - " # Use fluid.unique_name.guard() to share parameters with train network\n", - " with fluid.unique_name.guard():\n", - " val_output = network()\n", - "\n", - " val_prog = val_prog.clone(for_test=True)\n", - " val_output.persistable = True\n", - " \n", - " exe = fluid.Executor(place)\n", - " exe.run(startup_prog)\n", - "\n", - " if pretrained_model:\n", - " def if_exist(var):\n", - " return os.path.exists(os.path.join(pretrained_model, var.name))\n", - "\n", - " fluid.io.load_vars(\n", - " exe, pretrained_model, main_program=val_prog, predicate=if_exist)\n", - " \n", - " positive_ratio = 1. / (1. - threshold)\n", - " negative_ratio = 1. / threshold\n", - " \n", - " result = {}\n", - " for tid, data in enumerate(val_dataloader()):\n", - " file_names, part_splits, val_datas = [],[],[]\n", - " for item in data:\n", - " file_names.append(item[0])\n", - " part_splits.append(item[1])\n", - " val_datas.append(item[2])\n", - " \n", - " batch_preds, = exe.run(\n", - " program=val_prog,\n", - " feed={\"pixel\":np.array(val_datas)},\n", - " fetch_list=[val_output],\n", - " use_program_cache=True)\n", - "\n", - " for file, part, pred in zip(file_names, part_splits, batch_preds[:,0]):\n", - " if pred >= threshold:\n", - " threshold_pred = (pred-threshold) * positive_ratio\n", - " else:\n", - " threshold_pred = (pred-threshold) * negative_ratio\n", - " if file not in result.keys():\n", - " result[file] = [0, 0]\n", - " result[file][part] = threshold_pred\n", - " return result\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "result = inference(True, \"../weights/classify_weights_best/\")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "with open(output_file, \"w+\") as f:\n", - " f.write(\"{},{},{}\\n\".format(\"ASOCT_NAME\", \"LEFT_ANGLE_RESULTS\", \"RIGHT_ANGLE_RESULTS\"))\n", - " for file, pred_labels in result.items():\n", - " f.write(\"{},{},{}\\n\".format(file.split(\"/\")[-1], *pred_labels))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/PaddleCV/Research/AGEchallenge/Classification/README.md b/PaddleCV/Research/AGEchallenge/Classification/README.md deleted file mode 100644 index 9eb387c90ceafface85bc7c72ef431353e171af7..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/Classification/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# Angle closure Glaucoma Evaluation Challenge -The goal of the challenge is to evaluate and compare automated algorithms for angle closure classification and localization of scleral spur (SS) points on a common dataset of AS-OCT images. We invite the medical image analysis community to participate by developing and testing existing and novel automated classification and segmentation methods. -More detail [AGE challenge](https://age.grand-challenge.org/Details/). - -## Angle closure classification task - -1. Prepare data - - * We assume that you have downloaded data(two zip files), and stored @ `../datasets/`. - * (Updated on August 5) Replace update files. - * We provide a demo about `zip file extract`, `xlsx reader`, `data structure explore` and `Train/Val split`. - -2. Train - - * We assume that you have downloaded data, extracted compressed files, and stored @ `../datasets/`. - * Based on PaddlePaddle and [ResNet34](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/image_classification/models/resnet.py), we provide a baseline about `pretrain weight download and load`, `datareader`, `computation graph of ResNet34 model`, `training` and `evaluation metrics`. - -3. Inference - - * We assume that you have downloaded data, extracted compressed files, and stored @ `../datasets/`. - * We assume that you store checkpoint files @ `../weights/` - * Based on PaddlePaddle and [ResNet34](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/image_classification/models/resnet.py), we provide a baseline about `inference` and `dump result to csv file`. diff --git a/PaddleCV/Research/AGEchallenge/Classification/resnet.py b/PaddleCV/Research/AGEchallenge/Classification/resnet.py deleted file mode 100644 index 3f705d40e9f71bbc9e2ee0164ab27c38bbb17530..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/Classification/resnet.py +++ /dev/null @@ -1,198 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = ["ResNet", "ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class ResNet(): - def __init__(self, layers=50): - self.params = train_parameters - self.layers = layers - - def net(self, input, class_dim=1000): - layers = self.layers - supported_layers = [18, 34, 50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters = [64, 128, 256, 512] - - conv = self.conv_bn_layer( - input=input, num_filters=64, filter_size=7, stride=2, act='relu',name="conv1") - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - if layers >= 50: - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name="res"+str(block+2)+"a" - else: - conv_name="res"+str(block+2)+"b"+str(i) - else: - conv_name="res"+str(block+2)+chr(97+i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, name=conv_name) - - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc(input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - else: - for block in range(len(depth)): - for i in range(depth[block]): - conv_name="res"+str(block+2)+chr(97+i) - conv = self.basic_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - is_first=block==i==0, - name=conv_name) - - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc(input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - return out - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False, - name=name + '.conv2d.output.1') - - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm(input=conv, - act=act, - name=bn_name+'.output.1', - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance',) - - def shortcut(self, input, ch_out, stride, is_first, name): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1 or is_first == True: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, name): - conv0 = self.conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu',name=name+"_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name+"_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name+"_branch2c") - - short = self.shortcut(input, num_filters * 4, stride, is_first=False, name=name + "_branch1") - - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu',name=name+".add.output.5") - - def basic_block(self, input, num_filters, stride, is_first, name): - conv0 = self.conv_bn_layer(input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride, - name=name+"_branch2a") - conv1 = self.conv_bn_layer(input=conv0, num_filters=num_filters, filter_size=3, act=None, - name=name+"_branch2b") - short = self.shortcut(input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - -def ResNet18(): - model = ResNet(layers=18) - return model - - -def ResNet34(): - model = ResNet(layers=34) - return model - - -def ResNet50(): - model = ResNet(layers=50) - return model - - -def ResNet101(): - model = ResNet(layers=101) - return model - - -def ResNet152(): - model = ResNet(layers=152) - return model diff --git a/PaddleCV/Research/AGEchallenge/LocalizationFCN/1. PrepareData.ipynb b/PaddleCV/Research/AGEchallenge/LocalizationFCN/1. PrepareData.ipynb deleted file mode 100644 index 4f963cc4ed9374b7ed80ddaab7496beaff390f4c..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationFCN/1. PrepareData.ipynb +++ /dev/null @@ -1,652 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)\n", - "## Scleral spur localization Baseline (FCN)\n", - "\n", - "- To keep model training stable, images with coordinate == -1, were removed.\n", - "\n", - "- For real inference, you MIGHT keep all images in val_file_path file." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## requirement install" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install xlrd" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Zip File Extract\n", - "\n", - "Assume `Training100.zip` and `Validation_ASOCT_Image.zip` are stored @ `./AGE_challenge Baseline/datasets/`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!unzip -q ../datasets/Training100.zip -d ../datasets/\n", - "!unzip -q ../datasets/Validation_ASOCT_Image.zip -d ../datasets/" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Explore Data" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import csv\n", - "import matplotlib.pyplot as plt\n", - "import cv2\n", - "import os, shutil\n", - "import pprint\n", - "import pandas as pd\n", - "from mpl_toolkits.mplot3d.axes3d import Axes3D\n", - "\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "data_root_path = \"../datasets/Training100/\"\n", - "xlsx_file_path = os.path.join(data_root_path, \"Training100_Location.xlsx\")\n", - "\n", - "image_path = os.path.join(data_root_path, \"ASOCT_Image\")\n", - "label_file_path = os.path.join(data_root_path, \"train_loc.csv\")\n", - "\n", - "train_file_path = os.path.join(data_root_path, \"loc_train_split.csv\")\n", - "val_file_path = os.path.join(data_root_path, \"loc_val_split.csv\")\n", - "\n", - "img_save_path = os.path.join(data_root_path, \"ASOCT_Image_loc\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ASOCT_NameLeft_LabelX1Y1Right_LabelX2Y2
0T0056-10.jpg1228.833656466.95960111870.803864451.592300
1T0047-06.jpg1207.935545525.93876411792.231404432.521881
2T0066-15.jpg0239.372633476.27392501899.775568501.007410
3T0025-15.jpg0177.708404545.65593501862.380363439.228928
4T0088-06.jpg0285.256170735.07601401884.122651767.858589
\n", - "
" - ], - "text/plain": [ - " ASOCT_Name Left_Label X1 Y1 Right_Label X2 \\\n", - "0 T0056-10.jpg 1 228.833656 466.959601 1 1870.803864 \n", - "1 T0047-06.jpg 1 207.935545 525.938764 1 1792.231404 \n", - "2 T0066-15.jpg 0 239.372633 476.273925 0 1899.775568 \n", - "3 T0025-15.jpg 0 177.708404 545.655935 0 1862.380363 \n", - "4 T0088-06.jpg 0 285.256170 735.076014 0 1884.122651 \n", - "\n", - " Y2 \n", - "0 451.592300 \n", - "1 432.521881 \n", - "2 501.007410 \n", - "3 439.228928 \n", - "4 767.858589 " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "xlsx_file = pd.read_excel(xlsx_file_path)\n", - "xlsx_file.to_csv(label_file_path, \n", - " index=False, columns=['ASOCT_Name', 'X1', 'Y1', 'X2', 'Y2'])\n", - "xlsx_file.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[['ASOCT_Name', 'X1', 'Y1', 'X2', 'Y2'],\n", - " ['T0056-10.jpg',\n", - " '228.83365553922314',\n", - " '466.95960107867666',\n", - " '1870.8038638045307',\n", - " '451.59230045548907']]\n" - ] - } - ], - "source": [ - "data_list = []\n", - "\n", - "with open(label_file_path,'r') as f: \n", - " lines=csv.reader(f) \n", - " for key, line in enumerate(lines): \n", - " data_list.append(line)\n", - " \n", - "pprint.pprint(data_list[:2])" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# visualization\n", - "plt.figure(figsize=(15,5))\n", - "\n", - "file_name, l_x, l_y, r_x, r_y = data_list[1]\n", - "img = cv2.imread(os.path.join(image_path, file_name))[:,:,::-1]\n", - "\n", - "plt.imshow(img)\n", - "plt.scatter(float(l_x), float(l_y), c='r')\n", - "plt.scatter(float(r_x), float(r_y), c='r')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Center Split\n", - "\n", - "Assume images.shape = nrows, ncols, channel\n", - "\n", - "Especially 998, 2130, 3 for AGE dataset.\n", - "\n", - "Every image is cropped to two images\n", - "\n", - "- Left: [:, :nrow, :]\n", - "- Right: [:, -nrow:, :]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "def center_split(img, l_point, r_point):\n", - " # img: 3D nparray\n", - " # l_point/r_point: (x,y)\n", - " nrow, ncol, ch = img.shape\n", - " left_img, right_img = img[:,:nrow,:], img[:,-nrow:,:]\n", - " \n", - "# l_point = l_point\n", - " r_point = (r_point[0] - (ncol-nrow), r_point[1])\n", - " return left_img, right_img, l_point, r_point" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "img = cv2.imread(os.path.join(image_path, file_name))[:,:,::-1]\n", - "left_img, right_img, l_point, r_point = \\\n", - " center_split(img, (float(l_x), float(l_y)), (float(r_x), float(r_y)))\n", - "\n", - "plt.figure(figsize=(8,5))\n", - "\n", - "plt.subplot(1,2,1)\n", - "plt.imshow(left_img)\n", - "plt.scatter(*l_point, c='r')\n", - "plt.subplot(1,2,2)\n", - "plt.imshow(right_img)\n", - "plt.scatter(*r_point, c='r')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Prepare heatmap data for FCN model" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "def gaussian_k(x0,y0, sigma, width, height):\n", - " \"\"\" Make a square gaussian kernel centered at (x0, y0) with sigma as SD.\n", - " \"\"\"\n", - " x = np.arange(0, width, 1, float) ## (width,)\n", - " y = np.arange(0, height, 1, float)[:, np.newaxis] ## (height,1)\n", - " return np.exp(-((x-x0)**2 + (y-y0)**2) / (2*sigma**2))\n", - "\n", - "def generate_hm(height, width, point, s=10):\n", - " \"\"\" Generate a full Heap Map for every landmarks in an array\n", - " Args:\n", - " height : The height of Heat Map (the height of target output)\n", - " width : The width of Heat Map (the width of target output)\n", - " point : (x,y)\n", - " \"\"\"\n", - " hm = gaussian_k(point[0], point[1], s, height, width)\n", - " return hm" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "img = cv2.imread(os.path.join(image_path, file_name))[:,:,::-1]\n", - "\n", - "plt.figure(figsize=(10,5))\n", - "plt.subplot(1,3,1)\n", - "plt.imshow(left_img)\n", - "plt.scatter(*l_point, c='r')\n", - "plt.subplot(1,3,2)\n", - "\n", - "ratio = 256.0 / img.shape[0]\n", - "img = cv2.resize(img, (256, 256))\n", - "left_hm = generate_hm(img.shape[0], img.shape[0], (l_point[0] * ratio, l_point[1] * ratio))\n", - "plt.imshow(left_hm)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# 3D view of Heatmap\n", - "fig = plt.figure()\n", - "ax = Axes3D(fig)\n", - "\n", - "# 生成数据\n", - "X = np.arange(0, 256, 1)\n", - "Y = np.arange(0, 256, 1)\n", - "X, Y = np.meshgrid(X, Y)\n", - "\n", - "ax.plot_surface(X, Y, left_hm)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Train/Val split" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "def train_val_split(data_list, train_ratio=0.8, shuffle_seed=42):\n", - " testee_list = list(set( [line[0].split(\"-\")[0] for line in data_list[1:]] ))\n", - " \n", - " val_testee_idx = np.random.choice(testee_list, int(len(testee_list) * (1-train_ratio)), replace=False)\n", - "\n", - " train_list = []\n", - " val_list = []\n", - " \n", - " for line in data_list[1:]:\n", - " file_name = line[0]\n", - " if file_name.split(\"-\")[0] in val_testee_idx:\n", - " val_list.append(line)\n", - " else:\n", - " train_list.append(line)\n", - " \n", - " return train_list, val_list" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1296\n", - "304\n" - ] - } - ], - "source": [ - "train_data_list, val_data_list = train_val_split(data_list)\n", - "print(len(train_data_list))\n", - "print(len(val_data_list))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Center Split & Save\n", - "\n", - "Some coordinates might be -1" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['T0032-02.jpg', '-1.0', '-1.0', '1843.6821495037216', '423.24170285359793']\n", - "['T0032-04.jpg', '-1.0', '-1.0', '1830.46875', '417.4608405707196']\n", - "['T0044-15.jpg', '-1.0', '-1.0', '1921.7379475363339', '842.1701745834812']\n", - "['T0032-01.jpg', '-1.0', '-1.0', '1844.5079869727044', '429.0225651364764']\n", - "['T0032-03.jpg', '-1.0', '-1.0', '1846.1596619106695', '414.98332816377166']\n", - "['T0044-14.jpg', '-1.0', '-1.0', '1911.931717476072', '850.6125930521091']\n" - ] - } - ], - "source": [ - "split_train_list = []\n", - "\n", - "if os.path.exists(img_save_path):\n", - " shutil.rmtree(img_save_path)\n", - "os.mkdir(img_save_path)\n", - "\n", - "for item in train_data_list:\n", - " file_name, l_x, l_y, r_x, r_y = item\n", - " img = cv2.imread(os.path.join(image_path, file_name))\n", - " l_x, l_y, r_x, r_y = list(map(lambda x:float(x), [l_x, l_y, r_x, r_y]))\n", - " # split\n", - " left_img, right_img, l_point, r_point = center_split(img, (float(l_x), float(l_y)), (float(r_x), float(r_y)))\n", - " \n", - " if l_x != -1 and l_y != -1:\n", - " cv2.imwrite(os.path.join( img_save_path, file_name.split(\".\")[0]+'_left.jpg'), left_img)\n", - " split_train_list.append([file_name.split(\".\")[0]+'_left.jpg', *l_point])\n", - " else:\n", - " print(item)\n", - " if r_x != -1 and r_y != -1:\n", - " cv2.imwrite(os.path.join( img_save_path, file_name.split(\".\")[0]+'_right.jpg'), right_img)\n", - " split_train_list.append([file_name.split(\".\")[0]+'_right.jpg', *r_point])\n", - " else:\n", - " print(item)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['T0035-16.jpg', '-1.0', '-1.0', '1828.555184528656', '455.6486820725847']\n" - ] - } - ], - "source": [ - "split_val_list = []\n", - "for item in val_data_list:\n", - " file_name, l_x, l_y, r_x, r_y = item\n", - " l_x, l_y, r_x, r_y = list(map(lambda x:float(x), [l_x, l_y, r_x, r_y]))\n", - " img = cv2.imread(os.path.join(image_path, file_name))\n", - " # split\n", - " left_img, right_img, l_point, r_point = center_split(img, (float(l_x), float(l_y)), (float(r_x), float(r_y)))\n", - " if l_x != -1 and l_y != -1:\n", - " cv2.imwrite(os.path.join( img_save_path, file_name.split(\".\")[0]+'_left.jpg'), left_img)\n", - " split_val_list.append([file_name.split(\".\")[0]+'_left.jpg', *l_point])\n", - " else:\n", - " print(item)\n", - " if r_x != -1 and r_y != -1:\n", - " cv2.imwrite(os.path.join( img_save_path, file_name.split(\".\")[0]+'_right.jpg'), right_img)\n", - " split_val_list.append([file_name.split(\".\")[0]+'_right.jpg', *r_point])\n", - " else:\n", - " print(item)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "with open(train_file_path, \"w+\") as f:\n", - " for item in split_train_list:\n", - " f.write(\"{},{},{}\\n\".format(*item))\n", - "\n", - "with open(val_file_path, \"w+\") as f:\n", - " for item in split_val_list:\n", - " f.write(\"{},{},{}\\n\".format(*item))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/PaddleCV/Research/AGEchallenge/LocalizationFCN/2. Train.ipynb b/PaddleCV/Research/AGEchallenge/LocalizationFCN/2. Train.ipynb deleted file mode 100644 index 3c0f21f716a71294a37cbb07080fdd79e9baf28e..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationFCN/2. Train.ipynb +++ /dev/null @@ -1,501 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)\n", - "## Scleral spur localization Baseline (FCN)\n", - "\n", - "- To keep model training stable, images with coordinate == -1, were removed.\n", - "\n", - "- For real inference, you MIGHT keep all images in val_file_path file." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Training\n", - "\n", - "- Assume `Training100.zip` and `Validation_ASOCT_Image.zip` are stored @ `./AGE_challenge Baseline/datasets/`\n", - "- Assume `weights` are stored @ `./AGE_challenge Baseline/weights/`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Download ImageNet weight" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2019-08-06 14:16:30-- https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar\n", - "Resolving paddle-imagenet-models-name.bj.bcebos.com (paddle-imagenet-models-name.bj.bcebos.com)... 111.206.47.194, 202.106.5.21\n", - "Connecting to paddle-imagenet-models-name.bj.bcebos.com (paddle-imagenet-models-name.bj.bcebos.com)|111.206.47.194|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 87470080 (83M) [application/x-tar]\n", - "Saving to: ‘../weights/ResNet34_pretrained.tar’\n", - "\n", - "ResNet34_pretrained 100%[===================>] 83.42M 2.08MB/s in 43s \n", - "\n", - "2019-08-06 14:17:14 (1.93 MB/s) - ‘../weights/ResNet34_pretrained.tar’ saved [87470080/87470080]\n", - "\n" - ] - } - ], - "source": [ - "# https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification\n", - "!rm ../weights/ResNet34_pretrained.tar \n", - "!rm -rf ../weights/ResNet34_pretrained\n", - "\n", - "!wget -P ../weights/ https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar \n", - "!tar xvf ../weights/ResNet34_pretrained.tar -C ../weights/ > /dev/null # silent\n", - "!rm ../weights/ResNet34_pretrained/fc*" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Main Code" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import os, random, functools, math\n", - "import cv2\n", - "import numpy as np\n", - "import time" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running Verify Fluid Program ... \n", - "Your Paddle Fluid works well on SINGLE GPU or CPU.\n", - "Your Paddle Fluid works well on MUTIPLE GPU or CPU.\n", - "Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now\n" - ] - } - ], - "source": [ - "import paddle\n", - "import paddle.fluid as fluid\n", - "import paddle.fluid.layers as FL\n", - "import paddle.fluid.optimizer as FO\n", - "fluid.install_check.run_check()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# preprocess: extract left/right label col in Training100_Location.xlsx\n", - "# save to train_csv file\n", - "data_root_path = \"../datasets/Training100/\"\n", - "image_path = os.path.join(data_root_path, \"ASOCT_Image_loc\")\n", - "\n", - "train_file_path = os.path.join(data_root_path, \"loc_train_split.csv\")\n", - "val_file_path = os.path.join(data_root_path, \"loc_val_split.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "BATCH_SIZE = 32\n", - "THREAD = 8\n", - "BUF_SIZE = 32" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# Remove last global pooling and fullyconnect layer to enable FCN arch.\n", - "# Standard ResNet Implement: \n", - "# https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/image_classification/models/resnet.py\n", - "from resnet_modified import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define Data Loader" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def vflip_image(image):\n", - " return cv2.flip(image, flipCode=1)\n", - "\n", - "def gaussian_k(x0,y0, sigma, width, height):\n", - " \"\"\" Make a square gaussian kernel centered at (x0, y0) with sigma as SD.\n", - " \"\"\"\n", - " x = np.arange(0, width, 1, float) ## (width,)\n", - " y = np.arange(0, height, 1, float)[:, np.newaxis] ## (height,1)\n", - " return np.exp(-((x-x0)**2 + (y-y0)**2) / (2*sigma**2))\n", - "\n", - "def generate_hm(height, width, point, s=10):\n", - " \"\"\" Generate a full Heap Map for every landmarks in an array\n", - " Args:\n", - " height : The height of Heat Map (the height of target output)\n", - " width : The width of Heat Map (the width of target output)\n", - " point : (x,y)\n", - " \"\"\"\n", - " hm = gaussian_k(point[0], point[1], s, height, width)\n", - " return hm" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "def reader(img_path, file_list, batch_size=32, shuffle=True, shuffle_seed=42):\n", - " def read_file_list():\n", - " batch_data = []\n", - " np.random.shuffle(file_list)\n", - " for line in file_list:\n", - " file_name, p_x, p_y = line.split(\",\")\n", - " batch_data.append([file_name, float(p_x), float(p_y)])\n", - " if len(batch_data) == batch_size:\n", - " yield batch_data\n", - " batch_data = []\n", - " if len(batch_data) != 0:\n", - " yield batch_data\n", - " return read_file_list\n", - "\n", - "def process_batch_data(input_data, mode, rotate=True, flip=True):\n", - " batch_data = []\n", - " for sample in input_data:\n", - " file, p_x, p_y = sample\n", - " \n", - " img = cv2.imread( file )\n", - " img = img[:, :, ::-1].astype('float32') / 255.0\n", - " \n", - " ratio = 256.0 / img.shape[0]\n", - " p_x, p_y = p_x * ratio, p_y * ratio\n", - " img = cv2.resize(img, (256, 256))\n", - "\n", - " if mode == 'train':\n", - " img = img + np.random.randn(*img.shape) * 0.3 / 255 \n", - " if flip and np.random.randint(0,2):\n", - " img = vflip_image(img)\n", - " p_x = 256 - p_x\n", - " else:\n", - " pass\n", - " \n", - " hm = generate_hm(256, 256, (p_x, p_y))\n", - " img = img.transpose((2, 0, 1))\n", - " batch_data.append((img, hm))\n", - "\n", - " return batch_data" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "def data_loader(img_list, img_path, batch_size, order=False, mode='train'):\n", - " data_reader = reader(img_path, img_list, batch_size)\n", - " mapper = functools.partial(process_batch_data, mode=mode)\n", - " \n", - " data_reader = paddle.reader.shuffle(data_reader, 32)\n", - " \n", - " return paddle.reader.xmap_readers(\n", - " mapper, data_reader, THREAD, BUF_SIZE, order=order)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "with open(train_file_path) as flist:\n", - " train_file_list = [os.path.join(image_path,line.strip()) for line in flist]\n", - "\n", - "with open(val_file_path) as flist:\n", - " val_file_list = [os.path.join(image_path,line.strip()) for line in flist] " - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2586\n", - "607\n", - "../datasets/Training100/ASOCT_Image_loc/T0056-10_left.jpg,228.83365553922314,466.95960107867666\n" - ] - } - ], - "source": [ - "print(len(train_file_list))\n", - "print(len(val_file_list))\n", - "print(train_file_list[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "np.random.shuffle(train_file_list)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "train_dataloader = data_loader(train_file_list, image_path, BATCH_SIZE, False, mode='train')\n", - "val_dataloader = data_loader(val_file_list, image_path, BATCH_SIZE, True, mode='val')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define model (compute graph)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "def network():\n", - " data_shape = [3, 256, 256]\n", - " \n", - " model = ResNet34()\n", - " \n", - " input_feature = FL.data(name='pixel', shape=data_shape, dtype='float32')\n", - " hm = FL.data(name='label', shape=data_shape[1:], dtype='float32')\n", - " \n", - " logit = model.net(input_feature, class_dim=1)\n", - " pred_hm = FL.squeeze(\n", - " FL.conv2d_transpose(logit, num_filters=1, output_size=256), axes=[1]) # Bs, 256,256\n", - " \n", - " reader = fluid.io.PyReader(feed_list=[input_feature, hm], \n", - " capacity=64, iterable=True, use_double_buffer=True)\n", - "\n", - " cost = FL.square_error_cost(pred_hm, hm)\n", - " loss = FL.mean(cost)\n", - " \n", - " return [loss, pred_hm, reader]" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "def calc_dist(pred_hm, hm):\n", - " hm = np.array(hm)\n", - " \n", - " mean_dis = 0.\n", - " for single_hm, single_pred_hm in zip(hm, pred_hm):\n", - " # Find argmax_x, argmax_y from 2D tensor\n", - " label_x, label_y = np.unravel_index(single_hm.argmax(), single_hm.shape)\n", - " pred_x, pred_y = np.unravel_index(single_pred_hm.argmax(), single_pred_hm.shape)\n", - " mean_dis += np.sqrt((pred_x - label_x) ** 2 + (pred_y - label_y) ** 2)\n", - " \n", - " return mean_dis / hm.shape[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "def train(use_cuda, params_dirname_prefix, pretrained_model=False, EPOCH_NUM=10):\n", - " place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()\n", - " \n", - " startup_prog = fluid.Program()\n", - " train_prog = fluid.Program()\n", - " val_prog = fluid.Program()\n", - "\n", - " with fluid.program_guard(train_prog, startup_prog):\n", - " # fluid.unique_name.guard() to share parameters with test network\n", - " with fluid.unique_name.guard():\n", - " train_loss, train_output, train_reader = network()\n", - " \n", - " optimizer = fluid.optimizer.Adam(learning_rate=1e-4)\n", - " optimizer.minimize(train_loss)\n", - " \n", - " # 定义预测网络\n", - " with fluid.program_guard(val_prog, startup_prog):\n", - " # Use fluid.unique_name.guard() to share parameters with train network\n", - " with fluid.unique_name.guard():\n", - " val_loss, val_output, val_reader = network()\n", - "\n", - " val_prog = val_prog.clone(for_test=True)\n", - "\n", - " train_loss.persistable = True\n", - " val_loss.persistable = True\n", - " val_output.persistable = True\n", - " \n", - " exe = fluid.Executor(place)\n", - " exe.run(startup_prog)\n", - "\n", - " if pretrained_model:\n", - " def if_exist(var):\n", - " return os.path.exists(os.path.join(pretrained_model, var.name))\n", - "\n", - " fluid.io.load_vars(\n", - " exe, pretrained_model, main_program=train_prog, predicate=if_exist)\n", - "\n", - " train_reader.decorate_sample_list_generator( train_dataloader, places=place )\n", - " val_reader.decorate_sample_list_generator( val_dataloader, places=place )\n", - "\n", - " # For training test cost\n", - " def train_test(val_prog, val_reader):\n", - " count = 0\n", - " accumulated = [0,0]\n", - " \n", - " prediction = []\n", - " label_values = []\n", - " \n", - " for tid, val_data in enumerate(val_reader()):\n", - " avg_cost_np = exe.run(\n", - " program=val_prog,\n", - " feed=val_data,\n", - " fetch_list=[val_loss, val_output],\n", - " use_program_cache=True)\n", - " accumulated = [\n", - " x[0] + x[1][0] for x in zip(accumulated, avg_cost_np)\n", - " ]\n", - " prediction.append(avg_cost_np[1])\n", - " label_values.append( np.array(val_data[0]['label']) )\n", - " count += 1\n", - "\n", - " prediction = np.concatenate(prediction, 0)\n", - " label_values = np.concatenate(label_values, 0)\n", - "\n", - " mean_dis = calc_dist(prediction, label_values)\n", - " \n", - " return [x / count for x in accumulated], mean_dis\n", - "\n", - " # main train loop.\n", - " def train_loop():\n", - " step = 0\n", - " best_dist = 65536.\n", - "\n", - " for pass_id in range(EPOCH_NUM):\n", - " data_load_time = time.time()\n", - " for step_id, data_train in enumerate(train_reader()):\n", - " data_load_costtime = time.time() - data_load_time\n", - " start_time = time.time()\n", - " avg_loss_value = exe.run(\n", - " train_prog,\n", - " feed=data_train,\n", - " fetch_list=[train_loss, train_output], \n", - " use_program_cache=True)\n", - " cost_time = time.time() - start_time\n", - " if step_id % 50 == 0:\n", - " mean_dis = calc_dist(avg_loss_value[1], data_train[0]['label'])\n", - " print(\"Pass %d, Epoch %d, Cost %f, EuDis %f, Time %f, LoadTime %f\" % (\n", - " step_id, pass_id, avg_loss_value[0], mean_dis, cost_time, data_load_costtime))\n", - " else:\n", - " pass\n", - " step += 1\n", - " data_load_time = time.time()\n", - "\n", - " avg_cost_test, avg_dist_test = train_test(val_prog, val_reader)\n", - "\n", - " print('Test with Epoch {0}, Loss {1:2.4}, EuDis {2:2.4}'.format(\n", - " pass_id, avg_cost_test[0], avg_dist_test))\n", - "\n", - " if avg_dist_test < best_dist:\n", - " best_dist = avg_dist_test\n", - " print(\"\\nBest Dis, Checkpoint Saved!\\n\")\n", - " if not os.path.isdir(params_dirname_prefix+\"_best/\"):\n", - " os.makedirs(params_dirname_prefix+\"_best/\")\n", - " fluid.io.save_persistables(exe, params_dirname_prefix+\"_best/\", main_program=train_prog)\n", - "\n", - " if not os.path.isdir(params_dirname_prefix+\"_checkpoint/\"):\n", - " os.makedirs(params_dirname_prefix+\"_checkpoint/\")\n", - " fluid.io.save_persistables(exe, params_dirname_prefix+\"_checkpoint/\", main_program=train_prog)\n", - " train_loop()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# download imagenet pretrain weight from:\n", - "# https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification\n", - "# remove ResNet34_pretrained/fc*\n", - "train(use_cuda=True, params_dirname_prefix=\"../weights/loc_fcn\", \n", - " pretrained_model=\"../weights/ResNet34_pretrained\", EPOCH_NUM=40)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/PaddleCV/Research/AGEchallenge/LocalizationFCN/3. Inference and Result Visualization.ipynb b/PaddleCV/Research/AGEchallenge/LocalizationFCN/3. Inference and Result Visualization.ipynb deleted file mode 100644 index b77403509bac3353034a663c98aa366939b1ab32..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationFCN/3. Inference and Result Visualization.ipynb +++ /dev/null @@ -1,538 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)\n", - "## Scleral spur localization Baseline (FCN)\n", - "\n", - "- To keep model training stable, images with coordinate == -1, were removed.\n", - "\n", - "- For real inference, you MIGHT keep all images in val_file_path file." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Result Visualization\n", - "\n", - "- output_file do not contain coordinates==-1 images\n", - "- Due to the preprocessing step\n", - "- For real inference, keep all images in val_data_list." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os, random, functools, math\n", - "import cv2\n", - "import numpy as np\n", - "import time\n", - "import matplotlib.pyplot as plt\n", - "\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running Verify Fluid Program ... \n", - "Your Paddle Fluid works well on SINGLE GPU or CPU.\n", - "Your Paddle Fluid works well on MUTIPLE GPU or CPU.\n", - "Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now\n" - ] - } - ], - "source": [ - "import paddle\n", - "import paddle.fluid as fluid\n", - "import paddle.fluid.layers as FL\n", - "import paddle.fluid.optimizer as FO\n", - "fluid.install_check.run_check()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "BATCH_SIZE = 32\n", - "THREAD = 8\n", - "BUF_SIZE = 32" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# preprocess: extract left/right label col in Training100_Location.xlsx\n", - "# save to train_csv file\n", - "data_root_path = \"../datasets/Training100/\"\n", - "image_path = os.path.join(data_root_path, \"ASOCT_Image_loc\")\n", - "origin_data_dir = os.path.join(data_root_path, \"ASOCT_Image\")\n", - "\n", - "# !!!For real inference, keep all images in val_file_path file!!!\n", - "val_file_path = os.path.join(data_root_path, \"loc_val_split.csv\")\n", - "output_file = \"./Localization_Results.csv\"" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# Remove last global pooling and fullyconnect layer to enable FCN arch.\n", - "# Standard ResNet Implement: \n", - "# https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/image_classification/models/resnet.py\n", - "from resnet_modified import *" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "def gaussian_k(x0,y0, sigma, width, height):\n", - " \"\"\" Make a square gaussian kernel centered at (x0, y0) with sigma as SD.\n", - " \"\"\"\n", - " x = np.arange(0, width, 1, float) ## (width,)\n", - " y = np.arange(0, height, 1, float)[:, np.newaxis] ## (height,1)\n", - " return np.exp(-((x-x0)**2 + (y-y0)**2) / (2*sigma**2))\n", - "\n", - "def generate_hm(height, width, point, s=10):\n", - " \"\"\" Generate a full Heap Map for every landmarks in an array\n", - " Args:\n", - " height : The height of Heat Map (the height of target output)\n", - " width : The width of Heat Map (the width of target output)\n", - " point : (x,y)\n", - " \"\"\"\n", - " hm = gaussian_k(point[0], point[1], s, height, width)\n", - " return hm" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def reader(img_path, file_list, batch_size=32, shuffle=True, shuffle_seed=42):\n", - " def read_file_list():\n", - " batch_data = []\n", - " np.random.shuffle(file_list)\n", - " for line in file_list:\n", - " file_name, p_x, p_y = line.split(\",\")\n", - " batch_data.append([file_name, float(p_x), float(p_y)])\n", - " if len(batch_data) == batch_size:\n", - " yield batch_data\n", - " batch_data = []\n", - " if len(batch_data) != 0:\n", - " yield batch_data\n", - " return read_file_list\n", - "\n", - "def process_batch_data(input_data):\n", - " batch_data = []\n", - " for sample in input_data:\n", - " file, p_x, p_y = sample\n", - " \n", - " img = cv2.imread( file )\n", - " img = img[:, :, ::-1].astype('float32') / 255.0\n", - " \n", - " ratio = 256.0 / img.shape[0]\n", - " img = cv2.resize(img, (256, 256))\n", - " \n", - " hm = generate_hm(256, 256, (p_x * ratio, p_y * ratio))\n", - "\n", - " img = img.transpose((2, 0, 1))\n", - " batch_data.append((img, file, (p_x, p_y)))\n", - "\n", - " return batch_data" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "def data_loader(img_list, img_path, batch_size, order=False):\n", - " data_reader = reader(img_path, img_list, batch_size)\n", - " mapper = functools.partial(process_batch_data)\n", - " \n", - " data_reader = paddle.reader.shuffle(data_reader, 32)\n", - " \n", - " return paddle.reader.xmap_readers(\n", - " mapper, data_reader, THREAD, BUF_SIZE, order=order)\n", - " \n", - "with open(val_file_path) as flist:\n", - " val_file_list = [os.path.join(image_path,line.strip()) for line in flist] \n", - "\n", - "val_dataloader = data_loader(val_file_list, image_path, BATCH_SIZE, True)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "def network():\n", - " data_shape = [3, 256, 256]\n", - " \n", - " model = ResNet34()\n", - " \n", - " input_feature = FL.data(name='pixel', shape=data_shape, dtype='float32')\n", - " \n", - " logit = model.net(input_feature, class_dim=1)\n", - " pred_hm = FL.squeeze(\n", - " FL.conv2d_transpose(logit, num_filters=1, output_size=256), axes=[1]) # Bs, 256,256\n", - "\n", - " return pred_hm" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "def calc_dist(pred_hm, points, ratio=1.0):\n", - " dis_arr = []\n", - " pred_idx = []\n", - " for point, single_pred_hm in zip(points, pred_hm):\n", - " label_y, label_x = point\n", - " pred_x, pred_y = np.unravel_index(single_pred_hm.argmax(), single_pred_hm.shape)\n", - " pred_x, pred_y = pred_x * ratio, pred_y * ratio\n", - " dis_arr.append( np.sqrt((pred_x - label_x) ** 2 + (pred_y - label_y) ** 2))\n", - " pred_idx.append((pred_y, pred_x))\n", - " \n", - " return np.array(dis_arr), pred_idx" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "def infer(use_cuda, pretrained_model):\n", - " place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()\n", - " \n", - " startup_prog = fluid.Program()\n", - " val_prog = fluid.Program()\n", - " \n", - " # 定义预测网络\n", - " with fluid.program_guard(val_prog, startup_prog):\n", - " # Use fluid.unique_name.guard() to share parameters with train network\n", - " with fluid.unique_name.guard():\n", - " val_output = network()\n", - "\n", - " val_prog = val_prog.clone(for_test=True)\n", - " val_output.persistable = True\n", - " \n", - " exe = fluid.Executor(place)\n", - " exe.run(startup_prog)\n", - "\n", - " if pretrained_model:\n", - " def if_exist(var):\n", - " return os.path.exists(os.path.join(pretrained_model, var.name))\n", - "\n", - " fluid.io.load_vars(\n", - " exe, pretrained_model, main_program=val_prog, predicate=if_exist)\n", - "\n", - " file_names, points, pred_hms = [],[],[]\n", - "\n", - " for tid, val_data in enumerate(val_dataloader()):\n", - " imgs = []\n", - " for item in val_data:\n", - " imgs.append(item[0])\n", - " file_names.append(item[1])\n", - " points.append(item[2])\n", - " \n", - " batch_pred_hm, = exe.run(\n", - " program=val_prog,\n", - " feed={\"pixel\":np.array(imgs)},\n", - " fetch_list=[val_output],\n", - " use_program_cache=True)\n", - " pred_hms.append(batch_pred_hm)\n", - "\n", - " pred_hms = np.concatenate(pred_hms, 0)\n", - "\n", - " return file_names, np.array(points), pred_hms" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(607, 256, 256)\n", - "(607, 2)\n", - "(607,)\n" - ] - } - ], - "source": [ - "file_names, points, pred_hms = \\\n", - " infer(use_cuda=True, pretrained_model=\"../weights/loc_fcn_best\")\n", - "\n", - "print(pred_hms.shape)\n", - "print(points.shape)\n", - "\n", - "ratio = 998. / 256. # all cropped images have same shape\n", - "dis, pred_idx = calc_dist(pred_hms, points, ratio)\n", - "print(dis.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Minimum Distance: 0.70, idx: 400\n", - "Maximum Distance: 66.02, idx: 588\n", - "Average Distance: 19.66\n" - ] - } - ], - "source": [ - "print(\"Minimum Distance: {:.2f}, idx: {}\".format(dis.min(), dis.argmin()))\n", - "print(\"Maximum Distance: {:.2f}, idx: {}\".format(dis.max(), dis.argmax()))\n", - "print(\"Average Distance: {:.2f}\".format(dis.mean()))" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Text(0, 0.5, 'Number')" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAUSUlEQVR4nO3de5BkZXnH8e8D4gVBAVlgXVwHDUqIF4T1ilG8ExPFEAxRiyKGuFYKLbSoimi8xJI/oCy8xUTdCEKqEOMFUPCCiLdELWBBLruwBFzXcpFltSLhUhHC8uSP8w4cZ6d7emfm9GXe76eqa06fPt3n6Z7e33S/e57zRmYiSarHTqMuQJI0XAa/JFXG4Jekyhj8klQZg1+SKvOwURcwiL333junpqZGXYYkTZSrrrrqN5m5bOb6iQj+qakp1q5dO+oyJGmiRMQvZlvvUI8kVcbgl6TKGPySVBmDX5IqY/BLUmUMfkmqjMEvSZUx+CWpMga/JFVmIjp3a3f00SewadNWAKam9uH8888ccUWSJpnBPwE2bdrKihUXleXXjLgaSZPOoR5JqozBL0mVMfglqTIGvyRVxuCXpMoY/JJUGYNfkipj8EtSZQx+SaqMwS9JlTH4JakyBr8kVcbgl6TKdBb8EfGEiPheRNwQEesj4qSyfq+IuDQibi4/9+yqBknS9rr8xH8/cHJmHgw8DzgxIg4GTgEuy8wDgcvKdUnSkHQW/Jl5W2ZeXZbvAm4EVgBHAeeUzc4BXtdVDZKk7Q1ljD8ipoBnAZcD+2bmbeWmLcC+w6hBktTofAauiNgN+Arwjsy8MyIevC0zMyKyx/1WA6sBVq5c2XWZVXAKR0nQ8Sf+iNiFJvTPzczzy+rbI2J5uX05sHW2+2bmmsxclZmrli1b1mWZ1ZiewnHFiose/AMgqT5dHtUTwJnAjZn5kdZNXwOOL8vHA1/tqgZJ0va6HOo5HDgOuD4irinr3gOcBnwxIk4AfgH8ZYc1SJJm6Cz4M/M/gehx88u62q8kqT87dyWpMga/JFXG4Jekyhj8klSZzhu4tL2uG6ls1JLUj8E/AtONVM3yaybu8SVNNod6JKkyBr8kVcbgl6TKGPySVBmDX5IqY/BLUmUMfkmqjMEvSZUx+CWpMga/JFXG4Jekyhj8klQZg1+SKmPwS1JlDH5JqozBL0mVMfglqTIGvyRVxuCXpMoY/JJUGYNfkipj8EtSZQx+SaqMwS9JlTH4JakyBr8kVeZhoy5gqTn66BPYtGkrAFNT+3D++WeOtJ6NG3/GoYe+piz/ghUrtt+mXfOWLZvZb7/9gfGoX9LiM/gX2aZNW1mx4qKy/JoRVwP33ceD9WzYcPCs27Rr3rDhYA47bHzql7T4HOqRpMoY/JJUGYNfkipj8EtSZToL/og4KyK2RsS61rp/jIhbI+Kacnl1V/uXJM2uy0/8ZwNHzrL+o5l5SLl8o8P9S5Jm0VnwZ+YPgf/u6vElSfMzijH+t0XEdWUoaM8R7F+SqjbsBq5PAR8Csvw8A/ib2TaMiNXAaoCVK1cOq76ha3fW2in7kHHrgJaWkqEGf2bePr0cEf8KXNxn2zXAGoBVq1Zl99WNRruz1k7Zh4xbB7S0lAx1qCcilreu/jmwrte2kqRudPaJPyLOA44A9o6IzcAHgCMi4hCaoZ5NwFu72r8kaXadBX9mvmGW1Q7UStKI2bkrSZUx+CWpMga/JFXG4Jekyhj8klQZg1+SKmPwS1JlDH5JqozBL0mVMfglqTJzBn9E7BwRG4ZRjCSpe3MGf2ZuA26KiKV7UnxJqsigJ2nbE1gfEVcA90yvzMzXdlKVJKkzgwb/+zqtQsB4z8Y1yIxY7W0G3a6L5+nsXVJ/AwV/Zv4gIp4IHJiZ34mIXYGduy2tPuM8G9cgM2K1txl0uy6ep7N3Sf0NdFRPRLwF+DLwmbJqBXBhV0VJkroz6OGcJwKHA3cCZObNwD5dFSVJ6s6gwX9vZt43fSUiHkYzfaIkacIMGvw/iIj3AI+KiFcAXwIumuM+kqQxNGjwnwL8GrieZoL0bwDv7aooSVJ3Bj2q54GIOAe4nGaI56bMdKhHkibQQMEfEX8KfBr4GRDAARHx1sz8ZpfFSZIW36ANXGcAL8nMWwAi4snA1wGDX5ImzKDBf9d06Bcbgbs6qEdFu4t348ZfsGLF9tt03aHaq4b2+i1bNrPffvv3rbMrduhK89M3+CPi6LK4NiK+AXyRZoz/9cCVHddWtXYX74YNB8+6Tdcdqr1qmLn+sMP619kVO3Sl+ZnrE3/7X9PtwIvL8q+BR3VSkSSpU32DPzPfPKxCJEnDMehRPQcAbwem2vfxtMySNHkG/c/dC4Ezabp1H+iuHElS1wYN/t9l5ic6rUSSNBSDBv/HI+IDwLeBe6dXZubVnVQlSerMoMH/dOA44KU8NNST5bokaYIMGvyvB57UPjVzzQadYrCLRqdBGruG+TiD7mOQ599+XXe0pmE8H2mpGDT41wF7AFvn2rAGg04x2EWj0yCNXcN8nB3Zx1zPv/267mhNw3g+0lIxaPDvAWyIiCv5/TF+D+eUpAkzaPB/oNMqJElDM+j5+H/QdSGSpOEYtHP3Lh6aY/fhwC7APZn5mK4KkyR1Y9BP/LtPL0dEAEcBz+uqKElSdwadc/dB2bgQeFW/7SLirIjYGhHrWuv2iohLI+Lm8nPPedQsSVqAgYI/Io5uXY6JiNOA381xt7OBI2esOwW4LDMPBC4r1yVJQzToUT3tA9XvBzbRDPf0lJk/jIipGauPAo4oy+cA3wfeNWANkqRFMOgY/2Kdl3/fzLytLG8B9u21YUSsBlYDrFy5cpF2P38L6SrV4vH3IC3cXFMvvr/PzZmZH5rvjjMzIyL73L4GWAOwatWqntsNy0K6SrV4/D1ICzfXGP89s1wATmB+QzS3R8RygPLTU0BI0pDNNfXiGdPLEbE7cBLwZuALwBm97tfH14DjgdPKz6/O4zEkSQsw51E95RDMU4HraP5QHJqZ78rMvp/WI+I84CfAUyNic0ScQBP4r4iIm4GXl+uSpCGaa4z/w8DRNGPtT8/Muwd94Mx8Q4+bXjZ4eZKkxTbXJ/6TgccD7wV+FRF3lstdEXFn9+VJkhbbXGP8O9zZK0kabwa7JFVm0M5d9eG0f91arNe33fzVa7pMqQYG/yJw2r9uLdbr227+6jVdplQDh3okqTIGvyRVxuCXpMoY/JJUGYNfkipj8EtSZQx+SaqMx/H3MUmzPdlEJmlQBn8fkzTbk01kkgblUI8kVcbgl6TKGPySVBmDX5IqY/BLUmUMfkmqjMEvSZUx+CWpMtU2cDkN33iw41gavmqD32n4xoMdx9LwOdQjSZUx+CWpMga/JFXG4Jekyhj8klQZg1+SKmPwS1Jlqj2Ov5dJmm5R89duHFtIA5+NgJpEBv8MkzTdouav3Ti2kAY+GwE1iRzqkaTKGPySVBmDX5IqY/BLUmUMfkmqzEiO6omITcBdwDbg/sxcNYo6JKlGozyc8yWZ+ZsR7l+SquRQjyRVZlSf+BP4dkQk8JnMXDNzg4hYDawGWLlyZafFOP3f0rXUfrftTuEtWzaz3377b7dsB7HmMqrgf2Fm3hoR+wCXRsSGzPxhe4Pyx2ANwKpVq7LLYpz+b+laar/bmZ3lhx22/bIdxJrLSIZ6MvPW8nMrcAHwnFHUIUk1GnrwR8SjI2L36WXglcC6YdchSbUaxVDPvsAFETG9/89n5rdGUIckVWnowZ+ZG4FnDnu/kqSGh3NKUmUMfkmqjMEvSZVxBi5Vr93k1aspaiENUu2mq/ncX1psBr+qN7PJa7amqIU0SLWbruZzf2mxOdQjSZUx+CWpMga/JFXG4Jekyhj8klQZg1+SKmPwS1JlDH5JqsySb+DqNVXdUpiGT6Mx7tM5tutrdwm3/y0Msn7YxqWOGiz54O83VZ00H+M+nWO7vnaXcPvfwiDrh21c6qiBQz2SVBmDX5IqY/BLUmUMfkmqjMEvSZUx+CWpMga/JFVmyR/HLw1Lrykc+zV5TWLTUq+myB1tFmvfF37/derVhNbFc5iU130xGfzSIuk3hWMvk9i01Kspckebxdr3nb4+rVcTWhfPYVJe98XkUI8kVcbgl6TKGPySVBmDX5IqY/BLUmUMfkmqjMEvSZXxOH5pyAaZwWuxZo7rta8dnUWsXU+v7RdrX/PR6/Xq1WC2kMcfpCGt174GaRxrb7PQunsx+KUhG2QGr8WaOa7XvnZ0FrGZ9XS5r/no93rN1mC2kMcfpCGt174GaRxrb7PQuntxqEeSKmPwS1JlDH5JqozBL0mVMfglqTIjCf6IODIiboqIWyLilFHUIEm1GnrwR8TOwD8DfwIcDLwhIro5xkuStJ1RfOJ/DnBLZm7MzPuALwBHjaAOSapSZOZwdxhxDHBkZv5tuX4c8NzMfNuM7VYDq8vVpwI37cBu9gZ+swjlDtuk1g3WPgqTWjdMbu2TVvcTM3PZzJVj27mbmWuANfO5b0SszcxVi1xS5ya1brD2UZjUumFya5/UumcaxVDPrcATWtf3L+skSUMwiuC/EjgwIg6IiIcDfwV8bQR1SFKVhj7Uk5n3R8TbgEuAnYGzMnP9Iu9mXkNEY2BS6wZrH4VJrRsmt/ZJrfv3DP0/dyVJo2XnriRVxuCXpMosqeCfpFNBRMRZEbE1Ita11u0VEZdGxM3l556jrLGXiHhCRHwvIm6IiPURcVJZP9b1R8QjI+KKiLi21P3Bsv6AiLi8vG/+vRx0MHYiYueI+GlEXFyuT0rdmyLi+oi4JiLWlnVj/V6ZFhF7RMSXI2JDRNwYEc+flNr7WTLBP4GngjgbOHLGulOAyzLzQOCycn0c3Q+cnJkHA88DTiyv9bjXfy/w0sx8JnAIcGREPA84HfhoZv4B8FvghBHW2M9JwI2t65NSN8BLMvOQ1jHw4/5emfZx4FuZeRDwTJrXf1Jq7y0zl8QFeD5wSev6u4F3j7quOWqeAta1rt8ELC/Ly4GbRl3jgM/jq8ArJql+YFfgauC5NJ2YD5vtfTQuF5p+l8uAlwIXAzEJdZfaNgF7z1g39u8V4LHAzykHwUxS7XNdlswnfmAF8MvW9c1l3STZNzNvK8tbgH1HWcwgImIKeBZwORNQfxkuuQbYClwK/Ay4IzPvL5uM6/vmY8DfAw+U649jMuoGSODbEXFVORULTMB7BTgA+DXwuTLE9tmIeDSTUXtfSyn4l5RsPk6M9bG2EbEb8BXgHZl5Z/u2ca0/M7dl5iE0n6CfAxw04pLmFBF/BmzNzKtGXcs8vTAzD6UZhj0xIl7UvnFc3ys0fU6HAp/KzGcB9zBjWGeMa+9rKQX/UjgVxO0RsRyg/Nw64np6iohdaEL/3Mw8v6yemPoz8w7gezRDJHtExHQz4zi+bw4HXhsRm2jOZvtSmrHnca8bgMy8tfzcClxA8wd3Et4rm4HNmXl5uf5lmj8Ek1B7X0sp+JfCqSC+Bhxflo+nGTsfOxERwJnAjZn5kdZNY11/RCyLiD3K8qNo/l/iRpo/AMeUzcau7sx8d2bun5lTNO/r72bmmxjzugEi4tERsfv0MvBKYB1j/l4ByMwtwC8j4qll1cuAG5iA2uc06v9kWMwL8Grgv2jGbf9h1PXMUet5wG3A/9F8sjiBZtz2MuBm4DvAXqOus0ftL6T5ensdcE25vHrc6weeAfy01L0OeH9Z/yTgCuAW4EvAI0Zda5/ncARw8aTUXWq8tlzWT/+7HPf3Sqv+Q4C15T1zIbDnpNTe7+IpGySpMktpqEeSNACDX5IqY/BLUmUMfkmqjMEvSZUx+DVyEbGtnLlx+jKvk16Vs0DuXZZ/3GObsyPimNluWyyt57O+nAn05IjYqdy2KiI+0ee+UxHxxi7rk4Y+9aI0i//N5jQKiyYzX7CYj7eDHnw+EbEP8HngMcAHMnMtzXHhvUwBbyz3kTrhJ36NrRmf4FdFxPfL8m4R8blyjvfrIuIvZrnv3eVnRMQno5mn4TvAPq1tDouIH5STh13SasN/S0RcWT6tfyUidi3rz46IT0TEjyNi4yDfHLI5TcFq4G2lliNa59N/cetbzk9Lh+tpwB+Xde8s3wD+IyKuLpcXlPseERHfb50r/tzSUU1EPLvUeG008w/sXk5O9+HyvK6LiLfO/zejiTfqDjIvXoBtPNQBfA1wbFm/iXI6X2AV8P2yfDrwsdb995xl+7vLz6NpzsK5M/B44A6a0xzsAvwYWFa2OxY4qyw/rvXYpwJvL8tn03TI7kQz58MtPZ7P3bOsu4PmLI5H8FDn7UXA4WV5N5pv4A/eXtbvCjyyLB8IrC3LRwD/Q3OOnp2An9B0VD8c2Ag8u2z3mPK4q4H3lnWPoPnWccCof/deRnNxqEfjYEeHel5Oc84aADLzt322fRFwXmZuA34VEd8t658KPA24tHxQ3pnmFBoAT4uIU4E9aAL5ktbjXZiZDwA3RMRCT8f7I+AjEXEucH5mbi61tO0CfDIiDqH5A/mU1m1XZOZmgGhONT1F88fgtsy8EiDLWVMj4pXAM1rfUh5L84fk5wt8DppABr/G2f08NBz5yEV+7ADWZ+bzZ7ntbOB1mXltRPw1zafraffOeIy5dxTxJJrQ3gr84fT6zDwtIr5Oc56jH0XEq2a5+zuB22lmf9oJ+F2PWrbR/99z0HxzuaTPNqqEY/waZ5uAw8pyexz/UuDE6SvRf87THwLHljHu5cBLyvqbgGUR8fzyGLtExB+V23YHbovm1NNvWsgTiIhlwKeBT2ZmzrjtyZl5fWaeTnN22YOAu8r+pz2W5hP8A8BxNN9M+rkJWB4Rzy772D2aUzdfAvxdeU5ExFPK2TJVIYNf4+BRMw7nPK2s/yDw8Wgm6N7W2v5UYM+IWBcR1/JQmM/mApqzKN4A/BvNWDiZeR/NWP/p5TGuAaaPBHofzYxiPwI2LOD5rKc5e+O3y3OZ6R3lOVxHc5bWb9KcBXJb+Y/ZdwL/AhxfajyIZjKQnsrzOhb4p3KfS2m+LX22vAZXR8Q64DP4jb9anp1TkirjJ35JqozBL0mVMfglqTIGvyRVxuCXpMoY/JJUGYNfkirz/2AcGVnIevdJAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "_ = plt.hist(dis, bins=100, facecolor=\"blue\", edgecolor=\"black\", alpha=0.7)\n", - "plt.xlabel(\"Euclidean Distance\")\n", - "plt.ylabel(\"Number\")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Good Case\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "print(\"Good Case\")\n", - "plt.figure(figsize=(8,5))\n", - "\n", - "idx = dis.argmin()\n", - "\n", - "img = cv2.imread(file_names[idx])[:,:,::-1]\n", - "\n", - "plt.subplot(1,2,1)\n", - "plt.title(\"Loc Result\")\n", - "plt.imshow(img)\n", - "plt.scatter(*points[idx], c='r', s=150)\n", - "plt.scatter(*pred_idx[idx], c='b', s=120, marker='*')\n", - "\n", - "plt.subplot(1,2,2)\n", - "plt.title(\"Pred Heatmap\")\n", - "plt.imshow(pred_hms[idx])" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Bad Case\n", - "../datasets/Training100/ASOCT_Image_loc/T0035-10_left.jpg\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "print(\"Bad Case\")\n", - "\n", - "plt.figure(figsize=(8,5))\n", - "\n", - "idx = dis.argmax()\n", - "print(file_names[idx])\n", - "\n", - "img = cv2.imread(file_names[idx])[:,:,::-1]\n", - "\n", - "plt.subplot(1,2,1)\n", - "plt.title(\"Loc Result\")\n", - "plt.imshow(img)\n", - "plt.scatter(*points[idx], c='r', s=150)\n", - "plt.scatter(*pred_idx[idx], c='b', s=120, marker='*')\n", - "\n", - "plt.subplot(1,2,2)\n", - "plt.title(\"Pred Heatmap\")\n", - "plt.imshow(pred_hms[idx])" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "result = {}\n", - "for file, pred_id in zip(file_names, pred_idx):\n", - " # T0055-15_right.jpg -> T0055-15\n", - " ori_name = file.split(\"/\")[-1].split(\"_\")[0]\n", - " if ori_name not in result.keys():\n", - " result[ori_name] = [[-1, -1],[-1, -1]]\n", - "\n", - " if \"left\" in file: result[ori_name][0] = pred_id\n", - " else: \n", - " image = cv2.imread(os.path.join(origin_data_dir, ori_name+\".jpg\"))\n", - " nrows, ncols, ch = image.shape\n", - " result[ori_name][1] = [(ncols - nrows) + pred_id[0], pred_id[1]]" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "# output_file do not contain coordinates==-1 images\n", - "# Due to the preprocessing step\n", - "# For real inference, keep all images in val_data_list.\n", - "\n", - "with open(output_file, \"w+\") as f:\n", - " f.write(\"{},{},{},{},{}\\n\".format(\"ASOCT_NAME\", \"X_LEFT\", \"Y_LEFT\", \"X_RIGHT\", \"Y_RIGHT\"))\n", - " for file, pred_point in result.items():\n", - " f.write(\"{},{},{},{},{}\\n\".format(file+\".jpg\", *pred_point[0], *pred_point[1]))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/PaddleCV/Research/AGEchallenge/LocalizationFCN/README.md b/PaddleCV/Research/AGEchallenge/LocalizationFCN/README.md deleted file mode 100644 index 49a6c00cb7d3c45a5b284122a7f305b58ca986ae..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationFCN/README.md +++ /dev/null @@ -1,35 +0,0 @@ -# Angle closure Glaucoma Evaluation Challenge -The goal of the challenge is to evaluate and compare automated algorithms for angle closure classification and localization of scleral spur (SS) points on a common dataset of AS-OCT images. We invite the medical image analysis community to participate by developing and testing existing and novel automated classification and segmentation methods. -More detail [AGE challenge](https://age.grand-challenge.org/Details/). - -## Scleral spur localization task (FCN model) - -1. Method - - * Inspired by Fully Convolutional Networks (FCN), a keypoint is equivalent to 2D gaussian heatmap. - - - - - * Then, a localization task could be transformed to a heatmap regression task. - -2. Prepare data - - * We assume that you have downloaded data(two zip files), and store @ `../datasets/`. - * (Updated on August 5) Replace update files. - * We provide a demo about `zip file extract`, `data structure explore`, and `Train/Val split`. - -3. Train - - * We assume that you have download data, extract compressed files, and store @ `../datasets/`. - * Based on PaddlePaddle and [ResNet34](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/image_classification/models/resnet.py), we modify the model structure to enable FCN model, which global pooling layer and final fc layer were removed. - -4. Inference - - * We assume that you have download data, extract compressed files, and store @ `../datasets/`. - * We assume that you stored checkpoint files @ `../weights/loc_fcn` - * We provide a baseline about `inference` and `visualization`. - - - - \ No newline at end of file diff --git a/PaddleCV/Research/AGEchallenge/LocalizationFCN/assets/1.png b/PaddleCV/Research/AGEchallenge/LocalizationFCN/assets/1.png deleted file mode 100644 index be44b9eea1b002f79e86396013a530cca3d4bf88..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/AGEchallenge/LocalizationFCN/assets/1.png and /dev/null differ diff --git a/PaddleCV/Research/AGEchallenge/LocalizationFCN/assets/2.png b/PaddleCV/Research/AGEchallenge/LocalizationFCN/assets/2.png deleted file mode 100644 index b1a90b26998f9f4540bd15d6a1faaa597d094b7d..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/AGEchallenge/LocalizationFCN/assets/2.png and /dev/null differ diff --git a/PaddleCV/Research/AGEchallenge/LocalizationFCN/assets/3.png b/PaddleCV/Research/AGEchallenge/LocalizationFCN/assets/3.png deleted file mode 100644 index 9871349db8f9f32c25b6dfd392836fcd34d39306..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/AGEchallenge/LocalizationFCN/assets/3.png and /dev/null differ diff --git a/PaddleCV/Research/AGEchallenge/LocalizationFCN/assets/4.png b/PaddleCV/Research/AGEchallenge/LocalizationFCN/assets/4.png deleted file mode 100644 index 197113efd65cf9375d84e5aeadbfe7baad14d1d9..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/AGEchallenge/LocalizationFCN/assets/4.png and /dev/null differ diff --git a/PaddleCV/Research/AGEchallenge/LocalizationFCN/resnet_modified.py b/PaddleCV/Research/AGEchallenge/LocalizationFCN/resnet_modified.py deleted file mode 100644 index 7319464d42046c8fd048c43bfa5e881ee1d39b43..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationFCN/resnet_modified.py +++ /dev/null @@ -1,199 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = ["ResNet", "ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class ResNet(): - def __init__(self, layers=50): - self.params = train_parameters - self.layers = layers - - def net(self, input, class_dim=1000): - layers = self.layers - supported_layers = [18, 34, 50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters = [64, 128, 256, 512] - - conv = self.conv_bn_layer( - input=input, num_filters=64, filter_size=7, stride=2, act='relu',name="conv1") - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - if layers >= 50: - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name="res"+str(block+2)+"a" - else: - conv_name="res"+str(block+2)+"b"+str(i) - else: - conv_name="res"+str(block+2)+chr(97+i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, name=conv_name) - out = conv -# pool = fluid.layers.pool2d( -# input=conv, pool_size=7, pool_type='avg', global_pooling=True) -# stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) -# out = fluid.layers.fc(input=pool, -# size=class_dim, -# param_attr=fluid.param_attr.ParamAttr( -# initializer=fluid.initializer.Uniform(-stdv, stdv))) - else: - for block in range(len(depth)): - for i in range(depth[block]): - conv_name="res"+str(block+2)+chr(97+i) - conv = self.basic_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - is_first=block==i==0, - name=conv_name) - -# pool = fluid.layers.pool2d( -# input=conv, pool_size=7, pool_type='avg', global_pooling=True) -# stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) -# out = fluid.layers.fc(input=pool, -# size=class_dim, -# param_attr=fluid.param_attr.ParamAttr( -# initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = conv - return out - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False, - name=name + '.conv2d.output.1') - - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm(input=conv, - act=act, - name=bn_name+'.output.1', - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance',) - - def shortcut(self, input, ch_out, stride, is_first, name): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1 or is_first == True: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, name): - conv0 = self.conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu',name=name+"_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name+"_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name+"_branch2c") - - short = self.shortcut(input, num_filters * 4, stride, is_first=False, name=name + "_branch1") - - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu',name=name+".add.output.5") - - def basic_block(self, input, num_filters, stride, is_first, name): - conv0 = self.conv_bn_layer(input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride, - name=name+"_branch2a") - conv1 = self.conv_bn_layer(input=conv0, num_filters=num_filters, filter_size=3, act=None, - name=name+"_branch2b") - short = self.shortcut(input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - -def ResNet18(): - model = ResNet(layers=18) - return model - - -def ResNet34(): - model = ResNet(layers=34) - return model - - -def ResNet50(): - model = ResNet(layers=50) - return model - - -def ResNet101(): - model = ResNet(layers=101) - return model - - -def ResNet152(): - model = ResNet(layers=152) - return model diff --git a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/1. PrepareData.ipynb b/PaddleCV/Research/AGEchallenge/LocalizationRCNN/1. PrepareData.ipynb deleted file mode 100644 index 23acd427512f639f13a33bb3fa655c86c1953629..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/1. PrepareData.ipynb +++ /dev/null @@ -1,448 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)\n", - "## Scleral spur localization Baseline (RCNN)\n", - "\n", - "- To keep model training stable, images with coordinate == -1, were removed.\n", - "- For real inference, you MIGHT keep all images in val_file_path file." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## requirement install" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Looking in indexes: http://pip.baidu.com/root/baidu/+simple/\n", - "Requirement already satisfied: xlrd in /home/aiib-mia/anaconda3/envs/age/lib/python3.7/site-packages (1.2.0)\n", - "Looking in indexes: http://pip.baidu.com/root/baidu/+simple/\n", - "Requirement already satisfied: tqdm in /home/aiib-mia/anaconda3/envs/age/lib/python3.7/site-packages (4.32.1)\n", - "Looking in indexes: http://pip.baidu.com/root/baidu/+simple/\n", - "Requirement already satisfied: pycocotools in /home/aiib-mia/anaconda3/envs/age/lib/python3.7/site-packages/pycocotools-2.0-py3.7-linux-x86_64.egg (2.0)\n", - "Requirement already satisfied: setuptools>=18.0 in /home/aiib-mia/anaconda3/envs/age/lib/python3.7/site-packages (from pycocotools) (41.0.1)\n", - "Requirement already satisfied: cython>=0.27.3 in /home/aiib-mia/anaconda3/envs/age/lib/python3.7/site-packages (from pycocotools) (0.29.12)\n", - "Requirement already satisfied: matplotlib>=2.1.0 in /home/aiib-mia/anaconda3/envs/age/lib/python3.7/site-packages (from pycocotools) (3.1.1)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /home/aiib-mia/anaconda3/envs/age/lib/python3.7/site-packages (from matplotlib>=2.1.0->pycocotools) (1.1.0)\n", - "Requirement already satisfied: cycler>=0.10 in /home/aiib-mia/anaconda3/envs/age/lib/python3.7/site-packages (from matplotlib>=2.1.0->pycocotools) (0.10.0)\n", - "Requirement already satisfied: numpy>=1.11 in /home/aiib-mia/anaconda3/envs/age/lib/python3.7/site-packages (from matplotlib>=2.1.0->pycocotools) (1.16.4)\n", - "Requirement already satisfied: python-dateutil>=2.1 in /home/aiib-mia/anaconda3/envs/age/lib/python3.7/site-packages (from matplotlib>=2.1.0->pycocotools) (2.8.0)\n", - "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /home/aiib-mia/anaconda3/envs/age/lib/python3.7/site-packages (from matplotlib>=2.1.0->pycocotools) (2.4.1.1)\n", - "Requirement already satisfied: six in /home/aiib-mia/anaconda3/envs/age/lib/python3.7/site-packages (from cycler>=0.10->matplotlib>=2.1.0->pycocotools) (1.12.0)\n" - ] - } - ], - "source": [ - "!pip install xlrd\n", - "!pip install tqdm\n", - "!pip install pycocotools" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Zip File Extract\n", - "\n", - "Assume `Training100.zip` and `Validation_ASOCT_Image.zip` are stored @ `./AGE_challenge Baseline/datasets/`" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "unzip: cannot find or open ../datasets/Training100.zip, ../datasets/Training100.zip.zip or ../datasets/Training100.zip.ZIP.\n", - "unzip: cannot find or open ../datasets/Validation_ASOCT_Image.zip, ../datasets/Validation_ASOCT_Image.zip.zip or ../datasets/Validation_ASOCT_Image.zip.ZIP.\n" - ] - } - ], - "source": [ - "!unzip -q ../datasets/Training100.zip -d ../datasets/\n", - "!unzip -q ../datasets/Validation_ASOCT_Image.zip -d ../datasets/" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Explore Data" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import csv\n", - "import matplotlib.pyplot as plt\n", - "import cv2\n", - "import os, shutil\n", - "import pprint\n", - "import coco_parser\n", - "import json\n", - "import pandas as pd\n", - "\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "data_root_path = \"../datasets/Training100/\"\n", - "xlsx_file_path = os.path.join(data_root_path, \"Training100_Location.xlsx\")\n", - "\n", - "image_path = os.path.join(data_root_path, \"ASOCT_Image\")\n", - "label_file_path = os.path.join(data_root_path, \"train_loc.csv\")\n", - "\n", - "train_file_path = os.path.join(data_root_path, \"train2017\")\n", - "val_file_path = os.path.join(data_root_path, \"val2017\")\n", - "\n", - "json_path = os.path.join(data_root_path, \"annotations\")\n", - "train_json_path = os.path.join(json_path, \"instances_train2017.json\")\n", - "val_json_path = os.path.join(json_path, \"instances_val2017.json\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "xlsx_file = pd.read_excel(xlsx_file_path)\n", - "xlsx_file.to_csv(label_file_path, \n", - " index=False, columns=['ASOCT_Name', 'X1', 'Y1', 'X2', 'Y2'])" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[['ASOCT_Name', 'X1', 'Y1', 'X2', 'Y2'],\n", - " ['T0056-10.jpg',\n", - " '228.83365553922314',\n", - " '466.95960107867666',\n", - " '1870.8038638045307',\n", - " '451.59230045548907']]\n" - ] - } - ], - "source": [ - "data_list = []\n", - "\n", - "with open(label_file_path,'r') as f: \n", - " lines=csv.reader(f) \n", - " for key, line in enumerate(lines): \n", - " data_list.append(line)\n", - " \n", - "pprint.pprint(data_list[:2])" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.figure(figsize=(8, 5))\n", - "\n", - "file_name, l_x, l_y, r_x, r_y = data_list[1]\n", - "img = cv2.imread(os.path.join(image_path, file_name))[:,:,::-1]\n", - "\n", - "binary_mask = np.zeros((img.shape[0], img.shape[1]))\n", - "l_x, l_y = int(float(l_x)), int(float(l_y))\n", - "binary_mask[l_y - 100 : l_y + 100, l_x - 100 : l_x + 100] = 1\n", - "\n", - "plt.figure(figsize=(8,8))\n", - "\n", - "plt.subplot(2,1,1)\n", - "plt.imshow(img)\n", - "plt.scatter(float(l_x), float(l_y), c='r')\n", - "plt.scatter(float(r_x), float(r_y), c='r')\n", - "\n", - "plt.subplot(2,1,2)\n", - "plt.imshow(binary_mask)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Train/Val split" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "def train_val_split(data_list, train_ratio=0.8, shuffle_seed=42):\n", - " testee_list = list(set( [line[0].split(\"-\")[0] for line in data_list[1:]] ))\n", - " \n", - " val_testee_idx = np.random.choice(testee_list, int(len(testee_list) * (1-train_ratio)), replace=False)\n", - "\n", - " train_list = []\n", - " val_list = []\n", - " \n", - " for line in data_list[1:]:\n", - " file_name = line[0]\n", - " if file_name.split(\"-\")[0] in val_testee_idx:\n", - " val_list.append(line)\n", - " else:\n", - " train_list.append(line)\n", - " \n", - " return train_list, val_list" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1296\n", - "304\n" - ] - } - ], - "source": [ - "train_data_list, val_data_list = train_val_split(data_list)\n", - "print(len(train_data_list))\n", - "print(len(val_data_list))" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "def center_split(img, l_point, r_point):\n", - " # img: 3D nparray\n", - " # l_point/r_point: (x,y)\n", - " nrow, ncol, ch = img.shape\n", - " left_img, right_img = img[:,:nrow,:], img[:,-nrow:,:]\n", - " \n", - "# l_point = l_point\n", - " r_point = (r_point[0] - (ncol-nrow), r_point[1])\n", - " return left_img, right_img, l_point, r_point" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "split_train_list = []\n", - "\n", - "if os.path.exists(train_file_path):\n", - " shutil.rmtree(train_file_path)\n", - "os.mkdir(train_file_path)\n", - "\n", - "for item in train_data_list:\n", - " file_name, l_x, l_y, r_x, r_y = item\n", - " img = cv2.imread(os.path.join(image_path, file_name))\n", - " # split\n", - " left_img, right_img, l_point, r_point = center_split(img, (float(l_x), float(l_y)), (float(r_x), float(r_y)))\n", - " cv2.imwrite(os.path.join( train_file_path, file_name.split(\".\")[0]+'_left.jpg'), left_img)\n", - " cv2.imwrite(os.path.join( train_file_path, file_name.split(\".\")[0]+'_right.jpg'), right_img)\n", - "\n", - " split_train_list.append([file_name.split(\".\")[0]+'_left.jpg', *l_point])\n", - " split_train_list.append([file_name.split(\".\")[0]+'_right.jpg', *r_point])" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.figure(figsize=(15,5))\n", - "\n", - "file_name, l_x, l_y = split_train_list[1]\n", - "img = cv2.imread(os.path.join(train_file_path, file_name))[:,:,::-1]\n", - "\n", - "binary_mask = np.zeros((img.shape[0], img.shape[1]))\n", - "l_x, l_y = int(float(l_x)), int(float(l_y))\n", - "binary_mask[l_y - 100 : l_y + 100, l_x - 100 : l_x + 100] = 1\n", - "\n", - "plt.figure(figsize=(8,8))\n", - "\n", - "plt.subplot(1,2,1)\n", - "plt.imshow(img)\n", - "plt.scatter(float(l_x), float(l_y), c='r')\n", - "\n", - "plt.subplot(1,2,2)\n", - "plt.imshow(binary_mask)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "split_val_list = []\n", - "\n", - "if os.path.exists(val_file_path):\n", - " shutil.rmtree(val_file_path)\n", - "os.mkdir(val_file_path)\n", - "\n", - "for item in val_data_list:\n", - " file_name, l_x, l_y, r_x, r_y = item\n", - " img = cv2.imread(os.path.join(image_path, file_name))\n", - " # split\n", - " left_img, right_img, l_point, r_point = center_split(img, (float(l_x), float(l_y)), (float(r_x), float(r_y)))\n", - " cv2.imwrite(os.path.join( val_file_path, file_name.split(\".\")[0]+'_left.jpg'), left_img)\n", - " cv2.imwrite(os.path.join( val_file_path, file_name.split(\".\")[0]+'_right.jpg'), right_img)\n", - "\n", - " split_val_list.append([file_name.split(\".\")[0]+'_left.jpg', *l_point])\n", - " split_val_list.append([file_name.split(\".\")[0]+'_right.jpg', *r_point])" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "# Remove -1 in get_coco_dict()\n", - "coco_train_dict = coco_parser.get_coco_dict(train_file_path, split_train_list, box_range=100)\n", - "coco_val_dict = coco_parser.get_coco_dict(val_file_path, split_val_list, box_range=100)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "if not os.path.exists(json_path):\n", - " os.mkdir(json_path)\n", - "\n", - "with open(train_json_path, 'w+') as output_json_file:\n", - " json.dump(coco_train_dict, output_json_file)\n", - "\n", - "with open(val_json_path, 'w+') as output_json_file:\n", - " json.dump(coco_val_dict, output_json_file)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/2. Train.ipynb b/PaddleCV/Research/AGEchallenge/LocalizationRCNN/2. Train.ipynb deleted file mode 100644 index 22e542c6038ed4bbadaa2050f8d96faf0c9ca787..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/2. Train.ipynb +++ /dev/null @@ -1,144 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)\n", - "## Scleral spur localization Baseline (RCNN)\n", - "\n", - "- To keep model training stable, images with coordinate == -1, were removed.\n", - "- For real inference, you MIGHT keep all images in val_file_path file." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Training\n", - "\n", - "- Assume `Training100.zip` and `Validation_ASOCT_Image.zip` are stored @ `./AGE_challenge Baseline/datasets/`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Download PaddleDetection\n", - " https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/PaddleDetection\n", - " \n", - "- To use Origin PaddleDetection for AGE loc task :\n", - "- Replace `PaddleDetection/configs/cascade_rcnn_r50_fpn_1x.yml` with `./cascade_rcnn_r50_fpn_1x.yml`\n", - "- Or, you might edit configs/cascade_rcnn_r50_fpn_1x.yml\n", - "\n", - "```\n", - "max_iters: 12960\n", - "snapshot_iter: 2000\n", - "LearningRate:\n", - " milestones: [6000, 8000]\n", - "```\n", - "\n", - "for more details, see [PaddleDetection Docs](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/PaddleDetection/docs)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Custom dataset (coco type)\n", - "\n", - "- coco type json files and folder architecture was constructed in pervious cell.\n", - "- Under data10461/Training100/, you need these three folders:\n", - "\n", - "```\n", - "annotations\n", - "\tinstances_train2017.json\n", - "\tinstances_val2017.json\n", - "train2017\n", - "\t***.jpg\n", - "val2017\n", - "\t***.jpg\n", - "```\n", - "\n", - "for more details, see [Data.md](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/PaddleDetection/docs/DATA.md), [Data.md中文版](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/PaddleDetection/docs/DATA_cn.md)" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/aiib-mia/Desktop/shangfangxin/AGE_challenge Baseline/LocalizationRCNN/PaddleDetection\n" - ] - } - ], - "source": [ - "!rm -rf ./PaddleDetection/dataset/coco\n", - "# you might replace this path to absolute path\n", - "!ln -sf ../../../datasets/Training100/ ./PaddleDetection/dataset/coco\n", - "%cd ./PaddleDetection" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "env: PYTHONPATH=./\n", - "./\r\n" - ] - } - ], - "source": [ - "%set_env PYTHONPATH=./\n", - "!echo $PYTHONPATH" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# too many lines of training log, set print frequence to per 1000 steps. 12960 steps in total\n", - "!python tools/train.py -c configs/cascade_rcnn_r50_fpn_1x.yml -o log_iter=1000" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/3. Inference.ipynb b/PaddleCV/Research/AGEchallenge/LocalizationRCNN/3. Inference.ipynb deleted file mode 100644 index 943d9b1d2096799c8dbcb046a2bcabce96cb9eee..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/3. Inference.ipynb +++ /dev/null @@ -1,554 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)\n", - "## Scleral spur localization Baseline (RCNN)\n", - "\n", - "- During distance evaluating, images with coordinate == -1, were removed.\n", - "\n", - "- For real inference, you MIGHT keep all images in val_file_path file." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "sys.path.append('PaddleDetection')" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[33mdocstring_parser is not installed, argument description is not available\u001b[0m\n" - ] - } - ], - "source": [ - "import os\n", - "import glob\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import cv2\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from paddle import fluid\n", - "\n", - "from ppdet.core.workspace import load_config, create\n", - "from ppdet.modeling.model_input import create_feed\n", - "from ppdet.data.data_feed import create_reader\n", - "from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info\n", - "\n", - "from ppdet.utils.eval_utils import parse_fetches\n", - "import ppdet.utils.checkpoint as checkpoint\n", - "\n", - "from ppdet.utils.colormap import colormap\n", - "\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "FORMAT = '%(asctime)s-%(levelname)s: %(message)s'\n", - "logging.basicConfig(level=logging.INFO, format=FORMAT)\n", - "logger = logging.getLogger(__name__)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "config_path = \"./PaddleDetection/configs/cascade_rcnn_r50_fpn_1x.yml\"\n", - "weight_path = \"./PaddleDetection/output/cascade_rcnn_r50_fpn_1x/model_final\"\n", - "infer_dir = \"./PaddleDetection/dataset/coco/val2017\"\n", - "origin_data_dir = \"./PaddleDetection/dataset/coco/ASOCT_Image\"\n", - "anno_file = \"./PaddleDetection/dataset/coco/annotations/instances_val2017.json\"\n", - "output_file = \"./Localization_Results.csv\"\n", - "\n", - "data_root_path = \"../datasets/Training100/\"\n", - "label_file_path = os.path.join(data_root_path, \"train_loc.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "def get_test_images(infer_dir, infer_img):\n", - " \"\"\"\n", - " Get image path list in TEST mode\n", - " \"\"\"\n", - " assert infer_img is not None or infer_dir is not None, \\\n", - " \"--infer_img or --infer_dir should be set\"\n", - " assert infer_img is None or os.path.isfile(infer_img), \\\n", - " \"{} is not a file\".format(infer_img)\n", - " assert infer_dir is None or os.path.isdir(infer_dir), \\\n", - " \"{} is not a directory\".format(infer_dir)\n", - " images = []\n", - "\n", - " # infer_img has a higher priority\n", - " if infer_img and os.path.isfile(infer_img):\n", - " images.append(infer_img)\n", - " return images\n", - "\n", - " infer_dir = os.path.abspath(infer_dir)\n", - " assert os.path.isdir(infer_dir), \\\n", - " \"infer_dir {} is not a directory\".format(infer_dir)\n", - " exts = ['jpg', 'jpeg', 'png', 'bmp']\n", - " exts += [ext.upper() for ext in exts]\n", - " for ext in exts:\n", - " images.extend(glob.glob('{}/*.{}'.format(infer_dir, ext)))\n", - "\n", - " assert len(images) > 0, \"no image found in {}\".format(infer_dir)\n", - " print(\"Found {} inference images in total.\".format(len(images)))\n", - "\n", - " return images" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "cfg = load_config(config_path)\n", - "\n", - "if 'architecture' in cfg:\n", - " main_arch = cfg.architecture\n", - "else:\n", - " raise ValueError(\"'architecture' not specified in config file.\")\n", - "\n", - "if 'test_feed' not in cfg:\n", - " test_feed = create(main_arch + 'TestFeed')\n", - "else:\n", - " test_feed = create(cfg.test_feed)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 608 inference images in total.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-08-07 15:30:53,437-INFO: Loading checkpoint from ./PaddleDetection/output/cascade_rcnn_r50_fpn_1x/model_final...\n" - ] - } - ], - "source": [ - "test_images = get_test_images(infer_dir, infer_img=None)\n", - "test_feed.dataset.add_images(test_images)\n", - "\n", - "place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()\n", - "exe = fluid.Executor(place)\n", - "\n", - "model = create(main_arch)\n", - "\n", - "startup_prog = fluid.Program()\n", - "infer_prog = fluid.Program()\n", - "with fluid.program_guard(infer_prog, startup_prog):\n", - " with fluid.unique_name.guard():\n", - " _, feed_vars = create_feed(test_feed, use_pyreader=False)\n", - " test_fetches = model.test(feed_vars)\n", - "infer_prog = infer_prog.clone(True)\n", - "\n", - "reader = create_reader(test_feed)\n", - "feeder = fluid.DataFeeder(place=place, feed_list=feed_vars.values())\n", - "\n", - "exe.run(startup_prog)\n", - "checkpoint.load_checkpoint(exe, infer_prog, weight_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2019-08-07 15:30:53,593-INFO: Load categories from ./PaddleDetection/dataset/coco/annotations/instances_val2017.json\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "loading annotations into memory...\n", - "Done (t=0.00s)\n", - "creating index...\n", - "index created!\n" - ] - } - ], - "source": [ - "# parse infer fetches\n", - "extra_keys = ['im_info', 'im_id', 'im_shape']\n", - "\n", - "keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys)\n", - "\n", - "# anno_file = getattr(test_feed.dataset, 'annotation', None)\n", - "with_background = getattr(test_feed, 'with_background', True)\n", - "use_default_label = getattr(test_feed, 'use_default_label', False)\n", - "clsid2catid, catid2name = get_category_info(anno_file, with_background,\n", - " use_default_label)\n", - "\n", - "# whether output bbox is normalized in model output layer\n", - "is_bbox_normalized = False\n", - "if hasattr(model, 'is_bbox_normalized') and \\\n", - " callable(model.is_bbox_normalized):\n", - " is_bbox_normalized = model.is_bbox_normalized()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "def draw_bbox(image, im_id, catid2name, bboxes, threshold, is_bbox_normalized=False):\n", - " catid2color = {}\n", - " color_list = colormap(rgb=True)[:40]\n", - "\n", - " for dt in np.array(bboxes):\n", - " if im_id != dt['image_id']:\n", - " continue\n", - " catid, bbox, score = dt['category_id'], dt['bbox'], dt['score']\n", - " if score < threshold:\n", - " continue\n", - "\n", - " xmin, ymin, w, h = bbox\n", - " if is_bbox_normalized:\n", - " im_width, im_height = image.size\n", - " xmin *= im_width\n", - " ymin *= im_height\n", - " w *= im_width\n", - " h *= im_height\n", - " xmax = xmin + w\n", - " ymax = ymin + h\n", - "\n", - " if catid not in catid2color:\n", - " idx = np.random.randint(len(color_list))\n", - " catid2color[catid] = color_list[idx]\n", - " color = tuple(map(lambda x:int(x*255), catid2color[catid]))\n", - " xmin, ymin, xmax, ymax = int(xmin),int(ymin), int(xmax),int(ymax)\n", - "\n", - " # draw bbox\n", - " cv2.rectangle(image, (xmin,ymin), (xmax,ymax), color,10)\n", - " # draw label\n", - " text = \"{} {:.2f}\".format(catid2name[catid], score)\n", - " cv2.putText(image,text,(xmin, ymin-10), cv2.FONT_HERSHEY_SIMPLEX, 2,color,5, cv2.LINE_AA)\n", - " return image\n", - " \n", - "def calc_box(image, im_id, bboxes, threshold, is_bbox_normalized=False):\n", - " best_box = []\n", - " best_score = 0.\n", - " for dt in np.array(bboxes):\n", - " if im_id != dt['image_id']:\n", - " continue\n", - " catid, bbox, score = dt['category_id'], dt['bbox'], dt['score']\n", - " if score < threshold:\n", - " continue\n", - "\n", - " xmin, ymin, w, h = bbox\n", - " if is_bbox_normalized:\n", - " im_width, im_height = image.size\n", - " xmin *= im_width\n", - " ymin *= im_height\n", - " w *= im_width\n", - " h *= im_height\n", - " xmax = xmin + w\n", - " ymax = ymin + h\n", - "\n", - " xmin, ymin, xmax, ymax = int(xmin),int(ymin), int(xmax),int(ymax)\n", - " \n", - " if best_score < score:\n", - " best_score = score\n", - " best_box = (xmin, ymin, xmax, ymax)\n", - "\n", - " return best_box" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Save File" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "imid2path = reader.imid2path\n", - "result = {}\n", - "for iter_id, data in enumerate(reader()):\n", - " outs = exe.run(infer_prog,\n", - " feed=feeder.feed(data),\n", - " fetch_list=values,\n", - " return_numpy=False)\n", - " res = {\n", - " k: (np.array(v), v.recursive_sequence_lengths())\n", - " for k, v in zip(keys, outs)\n", - " }\n", - " \n", - " bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized)\n", - "\n", - " im_ids = res['im_id'][0]\n", - " for im_id in im_ids:\n", - " image_path = imid2path[int(im_id)]\n", - " image = cv2.imread(image_path)[:,:,::-1]\n", - "\n", - " box = calc_box(image, int(im_id), bbox_results, 0.1, is_bbox_normalized)\n", - "\n", - " file_name = image_path.split(\"/\")[-1].split(\"_\")[0]\n", - " point = ((box[2] + box[0]) / 2, (box[3] + box[1]) / 2)\n", - " \n", - " if file_name not in result.keys():\n", - " result[file_name] = [[-1, -1], [-1, -1]]\n", - " if \"left\" in image_path: result[file_name][0] = point\n", - " else: \n", - " image = cv2.imread(os.path.join(origin_data_dir, file_name+\".jpg\"))\n", - " nrows, ncols, ch = image.shape\n", - " result[file_name][1] = [(ncols - nrows) + point[0], point[1]]\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "with open(output_file, \"w+\") as f:\n", - " f.write(\"{},{},{},{},{}\\n\".format(\"ASOCT_NAME\", \"X_LEFT\", \"Y_LEFT\", \"X_RIGHT\", \"Y_RIGHT\"))\n", - " for file, pred_point in result.items():\n", - " f.write(\"{},{},{},{},{}\\n\".format(file+\".jpg\", *pred_point[0], *pred_point[1]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Result Visualization" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "imid2path = reader.imid2path\n", - "plt.figure(figsize=(10, 7))\n", - "for iter_id, data in enumerate(reader()):\n", - " outs = exe.run(infer_prog,\n", - " feed=feeder.feed(data),\n", - " fetch_list=values,\n", - " return_numpy=False)\n", - " res = {\n", - " k: (np.array(v), v.recursive_sequence_lengths())\n", - " for k, v in zip(keys, outs)\n", - " }\n", - "\n", - " bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized)\n", - "\n", - " # visualize result\n", - " im_ids = res['im_id'][0]\n", - " image_path = imid2path[int(im_ids[0])]\n", - " image = cv2.imread(image_path)[:,:,::-1]\n", - " image = draw_bbox(image.copy(), int(im_ids[0]), \n", - " catid2name, bbox_results, 0.5, is_bbox_normalized)\n", - " plt.subplot(2,3,iter_id+1)\n", - " plt.imshow(image)\n", - " if iter_id > 4: break" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Distance Evaluating" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "result_csv = pd.read_csv(output_file).merge(\n", - " pd.read_csv(label_file_path), left_on=\"ASOCT_NAME\", right_on=\"ASOCT_Name\", \n", - " how='left'\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "result_csv[\"x1_dis\"] = result_csv.apply(\n", - " func=lambda x:np.sqrt((x['X1']-x['X_LEFT'])**2 + (x['Y1']-x['Y_LEFT'])**2),\n", - " axis=1\n", - " )\n", - "result_csv.loc[result_csv['X1'] == -1, 'x1_dis'] = 0\n", - "result_csv[\"x2_dis\"] = result_csv.apply(\n", - " func=lambda x:np.sqrt((x['X2']-x['X_RIGHT'])**2 + (x['Y2']-x['Y_RIGHT'])**2),\n", - " axis=1\n", - " )\n", - "result_csv.loc[result_csv['X2'] == -1, 'x2_dis'] = 0" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(608,)" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dis = np.concatenate([result_csv['x1_dis'].values, result_csv['x2_dis'].values], axis=0)\n", - "dis.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Minimum Distance: 0.00, idx: 160\n", - "Maximum Distance: 97.52, idx: 33\n", - "Average Distance: 14.98\n" - ] - } - ], - "source": [ - "print(\"Minimum Distance: {:.2f}, idx: {}\".format(dis.min(), dis.argmin()))\n", - "print(\"Maximum Distance: {:.2f}, idx: {}\".format(dis.max(), dis.argmax()))\n", - "print(\"Average Distance: {:.2f}\".format(dis.mean()))" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Text(0, 0.5, 'Number')" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.figure(figsize=(10,5))\n", - "_ = plt.hist(dis, bins=100, facecolor=\"blue\", edgecolor=\"black\", alpha=0.7)\n", - "plt.xlabel(\"Euclidean Distance\")\n", - "plt.ylabel(\"Number\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/README.md b/PaddleCV/Research/AGEchallenge/LocalizationRCNN/README.md deleted file mode 100644 index 164dbb204e441ddc499fa09aec54bb2033deb243..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/README.md +++ /dev/null @@ -1,48 +0,0 @@ -# Angle closure Glaucoma Evaluation Challenge -The goal of the challenge is to evaluate and compare automated algorithms for angle closure classification and localization of scleral spur (SS) points on a common dataset of AS-OCT images. We invite the medical image analysis community to participate by developing and testing existing and novel automated classification and segmentation methods. -More detail [AGE challenge](https://age.grand-challenge.org/Details/). - -## Scleral spur localization task (RCNN model) - -1. Method - - * A localization task could be transformed to a object detection task. - - - - * Then, a image could be splited into 2 parts, the right part: - - - -2. Prepare data - - * We assume that you have downloaded data(two zip files), and stored @ `../datasets/`. - * (Updated on August 5) Replace update files. - * We provide a demo about `zip file extract`, `data structure explore`, `format data to coco type` and `Train/Val split`. - -3. Train - - * We assume that you have downloaded data, extracted compressed files, and stored @ `../datasets/`. - * You should download or clone [PaddleModels](https://github.com/PaddlePaddle/models) manually. - * After you download whole repo, use soft link command: - - ``` - $ git clone https://github.com/PaddlePaddle/models - $ cd models/PaddleCV/ - $ ln -sf ./PaddleDetection Research/AGEchallenge/LocalizationRCNN/PaddleDetection - ``` - - * Based on [PaddleDetection](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/PaddleDetection), we provide a guideline to modify config file. - * You could also simply replace `PaddleDetection/configs/cascade_rcnn_r50_fpn_1x.yml` with `./cascade_rcnn_r50_fpn_1x.yml` - * `PaddleDetection/tools/train.py` will automatically download pretrain weights, default save to `~/.cache/paddle/weights/`. - * We provide a demo to call `PaddleDetection/tools/train.py`, which starts training, save checkpoint to `PaddleDetection/output/cascade_rcnn_r50_fpn_1x/` - -4. Inference - - * We assume that you have downloaded data, extracted compressed files, and stored @ `../datasets/`. - * We assume that you stored checkpoint files @ `PaddleDetection/output/cascade_rcnn_r50_fpn_1x/` - * We provide a demo about `inference` and `visualization`. - - - - \ No newline at end of file diff --git a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/assets/1.png b/PaddleCV/Research/AGEchallenge/LocalizationRCNN/assets/1.png deleted file mode 100644 index 948218cc75da676bdead3a0beb93bc7ff393c92c..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/assets/1.png and /dev/null differ diff --git a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/assets/2.png b/PaddleCV/Research/AGEchallenge/LocalizationRCNN/assets/2.png deleted file mode 100644 index 416857777d978f5830c732cde4a60cda0cdd757c..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/assets/2.png and /dev/null differ diff --git a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/assets/3.png b/PaddleCV/Research/AGEchallenge/LocalizationRCNN/assets/3.png deleted file mode 100644 index cde07a203488dbc83432b13b6d5e8a682696d8c6..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/assets/3.png and /dev/null differ diff --git a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/assets/4.png b/PaddleCV/Research/AGEchallenge/LocalizationRCNN/assets/4.png deleted file mode 100644 index 3837010a0c9e6da0e6375fe7c98b5ca9bb27e0ef..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/assets/4.png and /dev/null differ diff --git a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/cascade_rcnn_r50_fpn_1x.yml b/PaddleCV/Research/AGEchallenge/LocalizationRCNN/cascade_rcnn_r50_fpn_1x.yml deleted file mode 100644 index 017729014e9bd39ba247d5ea95f65bd0bd9b27e8..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/cascade_rcnn_r50_fpn_1x.yml +++ /dev/null @@ -1,137 +0,0 @@ -architecture: CascadeRCNN -train_feed: FasterRCNNTrainFeed -eval_feed: FasterRCNNEvalFeed -test_feed: FasterRCNNTestFeed -max_iters: 12960 -snapshot_iter: 2000 -use_gpu: true -log_smooth_window: 20 -save_dir: output -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar -weights: output/cascade_rcnn_r50_fpn_1x/model_final -metric: COCO -num_classes: 2 - -CascadeRCNN: - backbone: ResNet - fpn: FPN - rpn_head: FPNRPNHead - roi_extractor: FPNRoIAlign - bbox_head: CascadeBBoxHead - bbox_assigner: CascadeBBoxAssigner - -ResNet: - norm_type: affine_channel - depth: 50 - feature_maps: [2, 3, 4, 5] - freeze_at: 2 - variant: b - -FPN: - min_level: 2 - max_level: 6 - num_chan: 256 - spatial_scale: [0.03125, 0.0625, 0.125, 0.25] - -FPNRPNHead: - anchor_generator: - anchor_sizes: [32, 64, 128, 256, 512] - aspect_ratios: [0.5, 1.0, 2.0] - stride: [16.0, 16.0] - variance: [1.0, 1.0, 1.0, 1.0] - anchor_start_size: 32 - min_level: 2 - max_level: 6 - num_chan: 256 - rpn_target_assign: - rpn_batch_size_per_im: 256 - rpn_fg_fraction: 0.5 - rpn_positive_overlap: 0.7 - rpn_negative_overlap: 0.3 - rpn_straddle_thresh: 0.0 - train_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 2000 - post_nms_top_n: 2000 - test_proposal: - min_size: 0.0 - nms_thresh: 0.7 - pre_nms_top_n: 1000 - post_nms_top_n: 1000 - -FPNRoIAlign: - canconical_level: 4 - canonical_size: 224 - min_level: 2 - max_level: 5 - box_resolution: 7 - sampling_ratio: 2 - -CascadeBBoxAssigner: - batch_size_per_im: 512 - bbox_reg_weights: [10, 20, 30] - bg_thresh_lo: [0.0, 0.0, 0.0] - bg_thresh_hi: [0.5, 0.6, 0.7] - fg_thresh: [0.5, 0.6, 0.7] - fg_fraction: 0.25 - -CascadeBBoxHead: - head: FC6FC7Head - nms: - keep_top_k: 100 - nms_threshold: 0.5 - score_threshold: 0.05 - -FC6FC7Head: - num_chan: 1024 - -LearningRate: - base_lr: 0.02 - schedulers: - - !PiecewiseDecay - gamma: 0.1 - milestones: [6000, 8000] - - !LinearWarmup - start_factor: 0.3333333333333333 - steps: 500 - -OptimizerBuilder: - optimizer: - momentum: 0.9 - type: Momentum - regularizer: - factor: 0.0001 - type: L2 - -FasterRCNNTrainFeed: - batch_size: 2 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_train2017.json - image_dir: train2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 - -FasterRCNNEvalFeed: - batch_size: 1 - dataset: - dataset_dir: dataset/coco - annotation: annotations/instances_val2017.json - image_dir: val2017 - batch_transforms: - - !PadBatch - pad_to_stride: 32 - -FasterRCNNTestFeed: - batch_size: 1 - dataset: - annotation: dataset/coco/annotations/instances_val2017.json - batch_transforms: - - !PadBatch - pad_to_stride: 32 - drop_last: false - num_workers: 2 diff --git a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/coco_parser.py b/PaddleCV/Research/AGEchallenge/LocalizationRCNN/coco_parser.py deleted file mode 100644 index 349c1626a5cbabc2c756ecb6cfadb052b93e641f..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationRCNN/coco_parser.py +++ /dev/null @@ -1,105 +0,0 @@ -# Ref: https://github.com/waspinator/pycococreator/blob/master/examples/shapes/shapes_to_coco.py -import datetime -import os -import re -import fnmatch -import cv2 -import numpy as np - -INFO = { - "description": "AGE Challenge Location", - "url": "https://age.grand-challenge.org/PaddlePaddle/", - "version": "0.1.0", - "year": 2019, - "contributor": "shangfangxin@baidu.com", - "date_created": datetime.datetime.utcnow().isoformat(' ') -} - -LICENSES = [ - { - "id": 1, - "name": "", - "url": "" - } -] - -CATEGORIES = [ - { - 'id': 1, - 'name': 'point', - 'supercategory': 'shape', - }, -] - -def create_image_info(image_id, file_name, image_size, - date_captured=datetime.datetime.utcnow().isoformat(' '), - license_id=1, coco_url="", flickr_url=""): - - image_info = { - "id": image_id, - "file_name": file_name, - "width": image_size[1], - "height": image_size[0], - "date_captured": date_captured, - "license": license_id, - "coco_url": coco_url, - "flickr_url": flickr_url - } - - return image_info - -def create_annotation_info(image, annotation_id, image_id, category_info, bounding_box=None): - - annotation_info = { - "id": annotation_id, - "image_id": image_id, - "category_id": category_info["id"], - "iscrowd": category_info["is_crowd"], - "area": bounding_box[2] * bounding_box[3], - "bbox": bounding_box, - "segmentation": [[]], - "width": image.shape[1], - "height": image.shape[0], - } - - return annotation_info - -def create_anno_info(image, point_x, point_y, image_id, category_info, segmentation_id, box_range): - bounding_box = [point_x - box_range, point_y - box_range, box_range*2, box_range*2] - return create_annotation_info(image, - segmentation_id, image_id, category_info, bounding_box) - - -def get_coco_dict(img_path, data_list, box_range=20): - - coco_output = { - "info": INFO, - "licenses": LICENSES, - "categories": CATEGORIES, - "images": [], - "annotations": [] - } - - image_id = 1 - segmentation_id = 1 - for item in data_list: - image_filename, p_x, p_y = item - p_x, p_y = int(float(p_x)), int(float(p_y)) - image_filename = os.path.join(img_path, image_filename) - - image = cv2.imread(image_filename) - image_info = create_image_info( - image_id, os.path.basename(image_filename), image.shape) - coco_output["images"].append(image_info) - - # filter for associated png annotations - class_id = 1 - category_info = {'id': class_id, 'is_crowd': 0} - - if p_x != -1 and p_y != -1: - coco_output["annotations"].append( - create_anno_info(image, p_x, p_y, image_id, category_info, segmentation_id, box_range)) - segmentation_id = segmentation_id + 1 - - image_id = image_id + 1 - return coco_output \ No newline at end of file diff --git a/PaddleCV/Research/AGEchallenge/LocalizationUNet/1. PrepareData.ipynb b/PaddleCV/Research/AGEchallenge/LocalizationUNet/1. PrepareData.ipynb deleted file mode 100644 index f1ab6c172b03b5c33726179f67caaf5568db56ca..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationUNet/1. PrepareData.ipynb +++ /dev/null @@ -1,652 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)\n", - "## Scleral spur localization Baseline (ResNet50+UNet)\n", - "\n", - "- To keep model training stable, images with coordinate == -1, were removed.\n", - "\n", - "- For real inference, you MIGHT keep all images in val_file_path file." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## requirement install" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install xlrd" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Zip File Extract\n", - "\n", - "Assume `Training100.zip` and `Validation_ASOCT_Image.zip` are stored @ `./AGE_challenge Baseline/datasets/`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!unzip -q ../datasets/Training100.zip -d ../datasets/\n", - "!unzip -q ../datasets/Validation_ASOCT_Image.zip -d ../datasets/" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Explore Data" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import csv\n", - "import matplotlib.pyplot as plt\n", - "import cv2\n", - "import os, shutil\n", - "import pprint\n", - "import pandas as pd\n", - "from mpl_toolkits.mplot3d.axes3d import Axes3D\n", - "\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "data_root_path = \"../datasets/Training100/\"\n", - "xlsx_file_path = os.path.join(data_root_path, \"Training100_Location.xlsx\")\n", - "\n", - "image_path = os.path.join(data_root_path, \"ASOCT_Image\")\n", - "label_file_path = os.path.join(data_root_path, \"train_loc.csv\")\n", - "\n", - "train_file_path = os.path.join(data_root_path, \"loc_train_split.csv\")\n", - "val_file_path = os.path.join(data_root_path, \"loc_val_split.csv\")\n", - "\n", - "img_save_path = os.path.join(data_root_path, \"ASOCT_Image_loc\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ASOCT_NameLeft_LabelX1Y1Right_LabelX2Y2
0T0056-10.jpg1228.833656466.95960111870.803864451.592300
1T0047-06.jpg1207.935545525.93876411792.231404432.521881
2T0066-15.jpg0239.372633476.27392501899.775568501.007410
3T0025-15.jpg0177.708404545.65593501862.380363439.228928
4T0088-06.jpg0285.256170735.07601401884.122651767.858589
\n", - "
" - ], - "text/plain": [ - " ASOCT_Name Left_Label X1 Y1 Right_Label X2 \\\n", - "0 T0056-10.jpg 1 228.833656 466.959601 1 1870.803864 \n", - "1 T0047-06.jpg 1 207.935545 525.938764 1 1792.231404 \n", - "2 T0066-15.jpg 0 239.372633 476.273925 0 1899.775568 \n", - "3 T0025-15.jpg 0 177.708404 545.655935 0 1862.380363 \n", - "4 T0088-06.jpg 0 285.256170 735.076014 0 1884.122651 \n", - "\n", - " Y2 \n", - "0 451.592300 \n", - "1 432.521881 \n", - "2 501.007410 \n", - "3 439.228928 \n", - "4 767.858589 " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "xlsx_file = pd.read_excel(xlsx_file_path)\n", - "xlsx_file.to_csv(label_file_path, \n", - " index=False, columns=['ASOCT_Name', 'X1', 'Y1', 'X2', 'Y2'])\n", - "xlsx_file.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[['ASOCT_Name', 'X1', 'Y1', 'X2', 'Y2'],\n", - " ['T0056-10.jpg',\n", - " '228.83365553922314',\n", - " '466.95960107867666',\n", - " '1870.8038638045307',\n", - " '451.59230045548907']]\n" - ] - } - ], - "source": [ - "data_list = []\n", - "\n", - "with open(label_file_path,'r') as f: \n", - " lines=csv.reader(f) \n", - " for key, line in enumerate(lines): \n", - " data_list.append(line)\n", - " \n", - "pprint.pprint(data_list[:2])" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# visualization\n", - "plt.figure(figsize=(15,5))\n", - "\n", - "file_name, l_x, l_y, r_x, r_y = data_list[1]\n", - "img = cv2.imread(os.path.join(image_path, file_name))[:,:,::-1]\n", - "\n", - "plt.imshow(img)\n", - "plt.scatter(float(l_x), float(l_y), c='r')\n", - "plt.scatter(float(r_x), float(r_y), c='r')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Center Split\n", - "\n", - "Assume images.shape = nrows, ncols, channel\n", - "\n", - "Especially 998, 2130, 3 for AGE dataset.\n", - "\n", - "Every image is cropped to two images\n", - "\n", - "- Left: [:, :nrow, :]\n", - "- Right: [:, -nrow:, :]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "def center_split(img, l_point, r_point):\n", - " # img: 3D nparray\n", - " # l_point/r_point: (x,y)\n", - " nrow, ncol, ch = img.shape\n", - " left_img, right_img = img[:,:nrow,:], img[:,-nrow:,:]\n", - " \n", - "# l_point = l_point\n", - " r_point = (r_point[0] - (ncol-nrow), r_point[1])\n", - " return left_img, right_img, l_point, r_point" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "img = cv2.imread(os.path.join(image_path, file_name))[:,:,::-1]\n", - "left_img, right_img, l_point, r_point = \\\n", - " center_split(img, (float(l_x), float(l_y)), (float(r_x), float(r_y)))\n", - "\n", - "plt.figure(figsize=(8,5))\n", - "\n", - "plt.subplot(1,2,1)\n", - "plt.imshow(left_img)\n", - "plt.scatter(*l_point, c='r')\n", - "plt.subplot(1,2,2)\n", - "plt.imshow(right_img)\n", - "plt.scatter(*r_point, c='r')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Prepare heatmap data for UNet model" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "def gaussian_k(x0,y0, sigma, width, height):\n", - " \"\"\" Make a square gaussian kernel centered at (x0, y0) with sigma as SD.\n", - " \"\"\"\n", - " x = np.arange(0, width, 1, float) ## (width,)\n", - " y = np.arange(0, height, 1, float)[:, np.newaxis] ## (height,1)\n", - " return np.exp(-((x-x0)**2 + (y-y0)**2) / (2*sigma**2))\n", - "\n", - "def generate_hm(height, width, point, s=10):\n", - " \"\"\" Generate a full Heap Map for every landmarks in an array\n", - " Args:\n", - " height : The height of Heat Map (the height of target output)\n", - " width : The width of Heat Map (the width of target output)\n", - " point : (x,y)\n", - " \"\"\"\n", - " hm = gaussian_k(point[0], point[1], s, height, width)\n", - " return hm" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "img = cv2.imread(os.path.join(image_path, file_name))[:,:,::-1]\n", - "\n", - "plt.figure(figsize=(10,5))\n", - "plt.subplot(1,3,1)\n", - "plt.imshow(left_img)\n", - "plt.scatter(*l_point, c='r')\n", - "plt.subplot(1,3,2)\n", - "\n", - "ratio = 256.0 / img.shape[0]\n", - "img = cv2.resize(img, (256, 256))\n", - "left_hm = generate_hm(img.shape[0], img.shape[0], (l_point[0] * ratio, l_point[1] * ratio))\n", - "plt.imshow(left_hm)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# 3D view of Heatmap\n", - "fig = plt.figure()\n", - "ax = Axes3D(fig)\n", - "\n", - "# 生成数据\n", - "X = np.arange(0, 256, 1)\n", - "Y = np.arange(0, 256, 1)\n", - "X, Y = np.meshgrid(X, Y)\n", - "\n", - "ax.plot_surface(X, Y, left_hm)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Train/Val split" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "def train_val_split(data_list, train_ratio=0.8, shuffle_seed=42):\n", - " testee_list = list(set( [line[0].split(\"-\")[0] for line in data_list[1:]] ))\n", - " \n", - " val_testee_idx = np.random.choice(testee_list, int(len(testee_list) * (1-train_ratio)), replace=False)\n", - "\n", - " train_list = []\n", - " val_list = []\n", - " \n", - " for line in data_list[1:]:\n", - " file_name = line[0]\n", - " if file_name.split(\"-\")[0] in val_testee_idx:\n", - " val_list.append(line)\n", - " else:\n", - " train_list.append(line)\n", - " \n", - " return train_list, val_list" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1296\n", - "304\n" - ] - } - ], - "source": [ - "train_data_list, val_data_list = train_val_split(data_list)\n", - "print(len(train_data_list))\n", - "print(len(val_data_list))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Center Split & Save\n", - "\n", - "Some coordinates might be -1" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['T0032-02.jpg', '-1.0', '-1.0', '1843.6821495037216', '423.24170285359793']\n", - "['T0032-04.jpg', '-1.0', '-1.0', '1830.46875', '417.4608405707196']\n", - "['T0044-15.jpg', '-1.0', '-1.0', '1921.7379475363339', '842.1701745834812']\n", - "['T0032-01.jpg', '-1.0', '-1.0', '1844.5079869727044', '429.0225651364764']\n", - "['T0032-03.jpg', '-1.0', '-1.0', '1846.1596619106695', '414.98332816377166']\n", - "['T0044-14.jpg', '-1.0', '-1.0', '1911.931717476072', '850.6125930521091']\n" - ] - } - ], - "source": [ - "split_train_list = []\n", - "\n", - "if os.path.exists(img_save_path):\n", - " shutil.rmtree(img_save_path)\n", - "os.mkdir(img_save_path)\n", - "\n", - "for item in train_data_list:\n", - " file_name, l_x, l_y, r_x, r_y = item\n", - " img = cv2.imread(os.path.join(image_path, file_name))\n", - " l_x, l_y, r_x, r_y = list(map(lambda x:float(x), [l_x, l_y, r_x, r_y]))\n", - " # split\n", - " left_img, right_img, l_point, r_point = center_split(img, (float(l_x), float(l_y)), (float(r_x), float(r_y)))\n", - " \n", - " if l_x != -1 and l_y != -1:\n", - " cv2.imwrite(os.path.join( img_save_path, file_name.split(\".\")[0]+'_left.jpg'), left_img)\n", - " split_train_list.append([file_name.split(\".\")[0]+'_left.jpg', *l_point])\n", - " else:\n", - " print(item)\n", - " if r_x != -1 and r_y != -1:\n", - " cv2.imwrite(os.path.join( img_save_path, file_name.split(\".\")[0]+'_right.jpg'), right_img)\n", - " split_train_list.append([file_name.split(\".\")[0]+'_right.jpg', *r_point])\n", - " else:\n", - " print(item)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['T0035-16.jpg', '-1.0', '-1.0', '1828.555184528656', '455.6486820725847']\n" - ] - } - ], - "source": [ - "split_val_list = []\n", - "for item in val_data_list:\n", - " file_name, l_x, l_y, r_x, r_y = item\n", - " l_x, l_y, r_x, r_y = list(map(lambda x:float(x), [l_x, l_y, r_x, r_y]))\n", - " img = cv2.imread(os.path.join(image_path, file_name))\n", - " # split\n", - " left_img, right_img, l_point, r_point = center_split(img, (float(l_x), float(l_y)), (float(r_x), float(r_y)))\n", - " if l_x != -1 and l_y != -1:\n", - " cv2.imwrite(os.path.join( img_save_path, file_name.split(\".\")[0]+'_left.jpg'), left_img)\n", - " split_val_list.append([file_name.split(\".\")[0]+'_left.jpg', *l_point])\n", - " else:\n", - " print(item)\n", - " if r_x != -1 and r_y != -1:\n", - " cv2.imwrite(os.path.join( img_save_path, file_name.split(\".\")[0]+'_right.jpg'), right_img)\n", - " split_val_list.append([file_name.split(\".\")[0]+'_right.jpg', *r_point])\n", - " else:\n", - " print(item)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "with open(train_file_path, \"w+\") as f:\n", - " for item in split_train_list:\n", - " f.write(\"{},{},{}\\n\".format(*item))\n", - "\n", - "with open(val_file_path, \"w+\") as f:\n", - " for item in split_val_list:\n", - " f.write(\"{},{},{}\\n\".format(*item))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/PaddleCV/Research/AGEchallenge/LocalizationUNet/2. Train.ipynb b/PaddleCV/Research/AGEchallenge/LocalizationUNet/2. Train.ipynb deleted file mode 100644 index 133818f491735e9cd218c247a13976d1f8dd6530..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationUNet/2. Train.ipynb +++ /dev/null @@ -1,502 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)\n", - "## Scleral spur localization Baseline (ResNet50+UNet)\n", - "\n", - "- To keep model training stable, images with coordinate == -1, were removed.\n", - "\n", - "- For real inference, you MIGHT keep all images in val_file_path file." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Training\n", - "\n", - "- Assume `Training100.zip` and `Validation_ASOCT_Image.zip` are stored @ `./AGE_challenge Baseline/datasets/`\n", - "- Assume `weights` are stored @ `./AGE_challenge Baseline/weights/`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Download ImageNet weight" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2019-08-08 16:00:14-- https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar\n", - "Resolving paddle-imagenet-models-name.bj.bcebos.com (paddle-imagenet-models-name.bj.bcebos.com)... 202.106.5.21, 111.206.47.194\n", - "Connecting to paddle-imagenet-models-name.bj.bcebos.com (paddle-imagenet-models-name.bj.bcebos.com)|202.106.5.21|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 102717440 (98M) [application/x-tar]\n", - "Saving to: ‘../weights/ResNet50_pretrained.tar’\n", - "\n", - "ResNet50_pretrained 100%[===================>] 97.96M 2.93MB/s in 34s \n", - "\n", - "2019-08-08 16:00:48 (2.90 MB/s) - ‘../weights/ResNet50_pretrained.tar’ saved [102717440/102717440]\n", - "\n" - ] - } - ], - "source": [ - "# https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification\n", - "!rm ../weights/ResNet50_pretrained.tar \n", - "!rm -rf ../weights/ResNet50_pretrained\n", - "\n", - "!wget -P ../weights/ https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar \n", - "!tar xvf ../weights/ResNet50_pretrained.tar -C ../weights/ > /dev/null # silent\n", - "!rm ../weights/ResNet50_pretrained/fc*" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Main Code" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import os, random, functools, math\n", - "import cv2\n", - "import numpy as np\n", - "import time" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running Verify Fluid Program ... \n", - "Your Paddle Fluid works well on SINGLE GPU or CPU.\n", - "Your Paddle Fluid works well on MUTIPLE GPU or CPU.\n", - "Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now\n" - ] - } - ], - "source": [ - "import paddle\n", - "import paddle.fluid as fluid\n", - "import paddle.fluid.layers as FL\n", - "import paddle.fluid.optimizer as FO\n", - "fluid.install_check.run_check()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# preprocess: extract left/right label col in Training100_Location.xlsx\n", - "# save to train_csv file\n", - "data_root_path = \"../datasets/Training100/\"\n", - "image_path = os.path.join(data_root_path, \"ASOCT_Image_loc\")\n", - "\n", - "train_file_path = os.path.join(data_root_path, \"loc_train_split.csv\")\n", - "val_file_path = os.path.join(data_root_path, \"loc_val_split.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "BATCH_SIZE = 8\n", - "THREAD = 8\n", - "BUF_SIZE = 32" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# Remove last global pooling and fullyconnect layer to enable UNet arch.\n", - "# Standard ResNet Implement: \n", - "# https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/image_classification/models/resnet.py\n", - "from resnet import *\n", - "from res_unet_paddle import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define Data Loader" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def vflip_image(image):\n", - " return cv2.flip(image, flipCode=1)\n", - "\n", - "def gaussian_k(x0,y0, sigma, width, height):\n", - " \"\"\" Make a square gaussian kernel centered at (x0, y0) with sigma as SD.\n", - " \"\"\"\n", - " x = np.arange(0, width, 1, float) ## (width,)\n", - " y = np.arange(0, height, 1, float)[:, np.newaxis] ## (height,1)\n", - " return np.exp(-((x-x0)**2 + (y-y0)**2) / (2*sigma**2))\n", - "\n", - "def generate_hm(height, width, point, s=10):\n", - " \"\"\" Generate a full Heap Map for every landmarks in an array\n", - " Args:\n", - " height : The height of Heat Map (the height of target output)\n", - " width : The width of Heat Map (the width of target output)\n", - " point : (x,y)\n", - " \"\"\"\n", - " hm = gaussian_k(point[0], point[1], s, height, width)\n", - " return hm" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "def reader(img_path, file_list, batch_size=32, shuffle=True, shuffle_seed=42):\n", - " def read_file_list():\n", - " batch_data = []\n", - " np.random.shuffle(file_list)\n", - " for line in file_list:\n", - " file_name, p_x, p_y = line.split(\",\")\n", - " batch_data.append([file_name, float(p_x), float(p_y)])\n", - " if len(batch_data) == batch_size:\n", - " yield batch_data\n", - " batch_data = []\n", - " if len(batch_data) != 0:\n", - " yield batch_data\n", - " return read_file_list\n", - "\n", - "def process_batch_data(input_data, mode, rotate=True, flip=True):\n", - " batch_data = []\n", - " for sample in input_data:\n", - " file, p_x, p_y = sample\n", - " \n", - " img = cv2.imread( file )\n", - " img = img[:, :, ::-1].astype('float32') / 255.0\n", - " \n", - " ratio = 256.0 / img.shape[0]\n", - " p_x, p_y = p_x * ratio, p_y * ratio\n", - " img = cv2.resize(img, (256, 256))\n", - "\n", - " if mode == 'train':\n", - " img = img + np.random.randn(*img.shape) * 0.3 / 255 \n", - " if flip and np.random.randint(0,2):\n", - " img = vflip_image(img)\n", - " p_x = 256 - p_x\n", - " else:\n", - " pass\n", - " \n", - " hm = generate_hm(256, 256, (p_x, p_y))\n", - " img = img.transpose((2, 0, 1))\n", - " batch_data.append((img, hm))\n", - "\n", - " return batch_data" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "def data_loader(img_list, img_path, batch_size, order=False, mode='train'):\n", - " data_reader = reader(img_path, img_list, batch_size)\n", - " mapper = functools.partial(process_batch_data, mode=mode)\n", - " \n", - " data_reader = paddle.reader.shuffle(data_reader, 32)\n", - " \n", - " return paddle.reader.xmap_readers(\n", - " mapper, data_reader, THREAD, BUF_SIZE, order=order)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "with open(train_file_path) as flist:\n", - " train_file_list = [os.path.join(image_path,line.strip()) for line in flist]\n", - "\n", - "with open(val_file_path) as flist:\n", - " val_file_list = [os.path.join(image_path,line.strip()) for line in flist] " - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2586\n", - "607\n", - "../datasets/Training100/ASOCT_Image_loc/T0056-10_left.jpg,228.83365553922314,466.95960107867666\n" - ] - } - ], - "source": [ - "print(len(train_file_list))\n", - "print(len(val_file_list))\n", - "print(train_file_list[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "np.random.shuffle(train_file_list)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "train_dataloader = data_loader(train_file_list, image_path, BATCH_SIZE, False, mode='train')\n", - "val_dataloader = data_loader(val_file_list, image_path, BATCH_SIZE, True, mode='val')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define model (compute graph)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "def network():\n", - " data_shape = [3, 256, 256]\n", - " \n", - " model = ResUNet(\n", - " ResNet50().net, 1\n", - " )\n", - " \n", - " input_feature = FL.data(name='pixel', shape=data_shape, dtype='float32')\n", - " hm = FL.data(name='label', shape=data_shape[1:], dtype='float32')\n", - " \n", - " logit = model.net(input_feature)\n", - " pred_hm = FL.squeeze(logit, axes=[1]) # Bs, 256,256\n", - "\n", - " reader = fluid.io.PyReader(feed_list=[input_feature, hm], \n", - " capacity=64, iterable=True, use_double_buffer=True)\n", - "\n", - " cost = FL.square_error_cost(pred_hm, hm)\n", - " loss = FL.mean(cost)\n", - " \n", - " return [loss, pred_hm, reader]" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "def calc_dist(pred_hm, hm):\n", - " hm = np.array(hm)\n", - " \n", - " mean_dis = 0.\n", - " for single_hm, single_pred_hm in zip(hm, pred_hm):\n", - " # Find argmax_x, argmax_y from 2D tensor\n", - " label_x, label_y = np.unravel_index(single_hm.argmax(), single_hm.shape)\n", - " pred_x, pred_y = np.unravel_index(single_pred_hm.argmax(), single_pred_hm.shape)\n", - " mean_dis += np.sqrt((pred_x - label_x) ** 2 + (pred_y - label_y) ** 2)\n", - " \n", - " return mean_dis / hm.shape[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "def train(use_cuda, params_dirname_prefix, pretrained_model=False, EPOCH_NUM=10):\n", - " place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()\n", - " \n", - " startup_prog = fluid.Program()\n", - " train_prog = fluid.Program()\n", - " val_prog = fluid.Program()\n", - "\n", - " with fluid.program_guard(train_prog, startup_prog):\n", - " # fluid.unique_name.guard() to share parameters with test network\n", - " with fluid.unique_name.guard():\n", - " train_loss, train_output, train_reader = network()\n", - " \n", - " optimizer = fluid.optimizer.Adam(learning_rate=1e-4)\n", - " optimizer.minimize(train_loss)\n", - " \n", - " # 定义预测网络\n", - " with fluid.program_guard(val_prog, startup_prog):\n", - " # Use fluid.unique_name.guard() to share parameters with train network\n", - " with fluid.unique_name.guard():\n", - " val_loss, val_output, val_reader = network()\n", - "\n", - " val_prog = val_prog.clone(for_test=True)\n", - "\n", - " train_loss.persistable = True\n", - " val_loss.persistable = True\n", - " val_output.persistable = True\n", - " \n", - " exe = fluid.Executor(place)\n", - " exe.run(startup_prog)\n", - "\n", - " if pretrained_model:\n", - " def if_exist(var):\n", - " return os.path.exists(os.path.join(pretrained_model, var.name))\n", - "\n", - " fluid.io.load_vars(\n", - " exe, pretrained_model, main_program=train_prog, predicate=if_exist)\n", - "\n", - " train_reader.decorate_sample_list_generator( train_dataloader, places=place )\n", - " val_reader.decorate_sample_list_generator( val_dataloader, places=place )\n", - "\n", - " # For training test cost\n", - " def train_test(val_prog, val_reader):\n", - " count = 0\n", - " accumulated = [0,0]\n", - " \n", - " prediction = []\n", - " label_values = []\n", - " \n", - " for tid, val_data in enumerate(val_reader()):\n", - " avg_cost_np = exe.run(\n", - " program=val_prog,\n", - " feed=val_data,\n", - " fetch_list=[val_loss, val_output],\n", - " use_program_cache=True)\n", - " accumulated = [\n", - " x[0] + x[1][0] for x in zip(accumulated, avg_cost_np)\n", - " ]\n", - " prediction.append(avg_cost_np[1])\n", - " label_values.append( np.array(val_data[0]['label']) )\n", - " count += 1\n", - "\n", - " prediction = np.concatenate(prediction, 0)\n", - " label_values = np.concatenate(label_values, 0)\n", - "\n", - " mean_dis = calc_dist(prediction, label_values)\n", - " \n", - " return [x / count for x in accumulated], mean_dis\n", - "\n", - " # main train loop.\n", - " def train_loop():\n", - " step = 0\n", - " best_dist = 65536.\n", - "\n", - " for pass_id in range(EPOCH_NUM):\n", - " data_load_time = time.time()\n", - " for step_id, data_train in enumerate(train_reader()):\n", - " data_load_costtime = time.time() - data_load_time\n", - " start_time = time.time()\n", - " avg_loss_value = exe.run(\n", - " train_prog,\n", - " feed=data_train,\n", - " fetch_list=[train_loss, train_output], \n", - " use_program_cache=True)\n", - " cost_time = time.time() - start_time\n", - " if step_id % 50 == 0:\n", - " mean_dis = calc_dist(avg_loss_value[1], data_train[0]['label'])\n", - " print(\"Pass %d, Epoch %d, Cost %f, EuDis %f, Time %f, LoadTime %f\" % (\n", - " step_id, pass_id, avg_loss_value[0], mean_dis, cost_time, data_load_costtime))\n", - " else:\n", - " pass\n", - " step += 1\n", - " data_load_time = time.time()\n", - "\n", - " avg_cost_test, avg_dist_test = train_test(val_prog, val_reader)\n", - "\n", - " print('Test with Epoch {0}, Loss {1:2.4}, EuDis {2:2.4}'.format(\n", - " pass_id, avg_cost_test[0], avg_dist_test))\n", - "\n", - " if avg_dist_test < best_dist:\n", - " best_dist = avg_dist_test\n", - " print(\"\\nBest Dis, Checkpoint Saved!\\n\")\n", - " if not os.path.isdir(params_dirname_prefix+\"_best/\"):\n", - " os.makedirs(params_dirname_prefix+\"_best/\")\n", - " fluid.io.save_persistables(exe, params_dirname_prefix+\"_best/\", main_program=train_prog)\n", - "\n", - " if not os.path.isdir(params_dirname_prefix+\"_checkpoint/\"):\n", - " os.makedirs(params_dirname_prefix+\"_checkpoint/\")\n", - " fluid.io.save_persistables(exe, params_dirname_prefix+\"_checkpoint/\", main_program=train_prog)\n", - " train_loop()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# download imagenet pretrain weight from:\n", - "# https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification\n", - "train(use_cuda=True, params_dirname_prefix=\"../weights/loc_unet\", \n", - " pretrained_model=\"../weights/ResNet50_pretrained\", EPOCH_NUM=40)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/PaddleCV/Research/AGEchallenge/LocalizationUNet/3. Inference and Result Visualization.ipynb b/PaddleCV/Research/AGEchallenge/LocalizationUNet/3. Inference and Result Visualization.ipynb deleted file mode 100644 index 310db838e2ee9f6817a77df74bfb73135698a8ef..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationUNet/3. Inference and Result Visualization.ipynb +++ /dev/null @@ -1,540 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# [Angle closure Glaucoma Evaluation Challenge](https://age.grand-challenge.org/Details/)\n", - "## Scleral spur localization Baseline (ResNet50+UNet)\n", - "\n", - "- To keep model training stable, images with coordinate == -1, were removed.\n", - "\n", - "- For real inference, you MIGHT keep all images in val_file_path file." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Result Visualization\n", - "\n", - "- output_file do not contain coordinates==-1 images\n", - "- Due to the preprocessing step\n", - "- For real inference, keep all images in val_data_list." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os, random, functools, math\n", - "import cv2\n", - "import numpy as np\n", - "import time\n", - "import matplotlib.pyplot as plt\n", - "\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running Verify Fluid Program ... \n", - "Your Paddle Fluid works well on SINGLE GPU or CPU.\n", - "Your Paddle Fluid works well on MUTIPLE GPU or CPU.\n", - "Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now\n" - ] - } - ], - "source": [ - "import paddle\n", - "import paddle.fluid as fluid\n", - "import paddle.fluid.layers as FL\n", - "import paddle.fluid.optimizer as FO\n", - "fluid.install_check.run_check()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "BATCH_SIZE = 8\n", - "THREAD = 8\n", - "BUF_SIZE = 32" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# preprocess: extract left/right label col in Training100_Location.xlsx\n", - "# save to train_csv file\n", - "data_root_path = \"../datasets/Training100/\"\n", - "image_path = os.path.join(data_root_path, \"ASOCT_Image_loc\")\n", - "origin_data_dir = os.path.join(data_root_path, \"ASOCT_Image\")\n", - "\n", - "# !!!For real inference, keep all images in val_file_path file!!!\n", - "val_file_path = os.path.join(data_root_path, \"loc_val_split.csv\")\n", - "output_file = \"./Localization_Results.csv\"" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# Remove last global pooling and fullyconnect layer to enable UNet arch.\n", - "# Standard ResNet Implement: \n", - "# https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/image_classification/models/resnet.py\n", - "from resnet import *\n", - "from res_unet_paddle import *" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "def gaussian_k(x0,y0, sigma, width, height):\n", - " \"\"\" Make a square gaussian kernel centered at (x0, y0) with sigma as SD.\n", - " \"\"\"\n", - " x = np.arange(0, width, 1, float) ## (width,)\n", - " y = np.arange(0, height, 1, float)[:, np.newaxis] ## (height,1)\n", - " return np.exp(-((x-x0)**2 + (y-y0)**2) / (2*sigma**2))\n", - "\n", - "def generate_hm(height, width, point, s=10):\n", - " \"\"\" Generate a full Heap Map for every landmarks in an array\n", - " Args:\n", - " height : The height of Heat Map (the height of target output)\n", - " width : The width of Heat Map (the width of target output)\n", - " point : (x,y)\n", - " \"\"\"\n", - " hm = gaussian_k(point[0], point[1], s, height, width)\n", - " return hm" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def reader(img_path, file_list, batch_size=32, shuffle=True, shuffle_seed=42):\n", - " def read_file_list():\n", - " batch_data = []\n", - " np.random.shuffle(file_list)\n", - " for line in file_list:\n", - " file_name, p_x, p_y = line.split(\",\")\n", - " batch_data.append([file_name, float(p_x), float(p_y)])\n", - " if len(batch_data) == batch_size:\n", - " yield batch_data\n", - " batch_data = []\n", - " if len(batch_data) != 0:\n", - " yield batch_data\n", - " return read_file_list\n", - "\n", - "def process_batch_data(input_data):\n", - " batch_data = []\n", - " for sample in input_data:\n", - " file, p_x, p_y = sample\n", - " \n", - " img = cv2.imread( file )\n", - " img = img[:, :, ::-1].astype('float32') / 255.0\n", - " \n", - " ratio = 256.0 / img.shape[0]\n", - " img = cv2.resize(img, (256, 256))\n", - " \n", - " hm = generate_hm(256, 256, (p_x * ratio, p_y * ratio))\n", - "\n", - " img = img.transpose((2, 0, 1))\n", - " batch_data.append((img, file, (p_x, p_y)))\n", - "\n", - " return batch_data" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "def data_loader(img_list, img_path, batch_size, order=False):\n", - " data_reader = reader(img_path, img_list, batch_size)\n", - " mapper = functools.partial(process_batch_data)\n", - " \n", - " data_reader = paddle.reader.shuffle(data_reader, 32)\n", - " \n", - " return paddle.reader.xmap_readers(\n", - " mapper, data_reader, THREAD, BUF_SIZE, order=order)\n", - " \n", - "with open(val_file_path) as flist:\n", - " val_file_list = [os.path.join(image_path,line.strip()) for line in flist] \n", - "\n", - "val_dataloader = data_loader(val_file_list, image_path, BATCH_SIZE, True)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "def network():\n", - " data_shape = [3, 256, 256]\n", - " \n", - " model = ResUNet(\n", - " ResNet50().net, 1\n", - " )\n", - " \n", - " input_feature = FL.data(name='pixel', shape=data_shape, dtype='float32')\n", - " \n", - " logit = model.net(input_feature)\n", - " pred_hm = FL.squeeze(logit, axes=[1]) # Bs, 256,256\n", - "\n", - " return pred_hm" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "def calc_dist(pred_hm, points, ratio=1.0):\n", - " dis_arr = []\n", - " pred_idx = []\n", - " for point, single_pred_hm in zip(points, pred_hm):\n", - " label_y, label_x = point\n", - " pred_x, pred_y = np.unravel_index(single_pred_hm.argmax(), single_pred_hm.shape)\n", - " pred_x, pred_y = pred_x * ratio, pred_y * ratio\n", - " dis_arr.append( np.sqrt((pred_x - label_x) ** 2 + (pred_y - label_y) ** 2))\n", - " pred_idx.append((pred_y, pred_x))\n", - " \n", - " return np.array(dis_arr), pred_idx" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "def infer(use_cuda, pretrained_model):\n", - " place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()\n", - " \n", - " startup_prog = fluid.Program()\n", - " val_prog = fluid.Program()\n", - " \n", - " # 定义预测网络\n", - " with fluid.program_guard(val_prog, startup_prog):\n", - " # Use fluid.unique_name.guard() to share parameters with train network\n", - " with fluid.unique_name.guard():\n", - " val_output = network()\n", - "\n", - " val_prog = val_prog.clone(for_test=True)\n", - " val_output.persistable = True\n", - " \n", - " exe = fluid.Executor(place)\n", - " exe.run(startup_prog)\n", - "\n", - " if pretrained_model:\n", - " def if_exist(var):\n", - " return os.path.exists(os.path.join(pretrained_model, var.name))\n", - "\n", - " fluid.io.load_vars(\n", - " exe, pretrained_model, main_program=val_prog, predicate=if_exist)\n", - "\n", - " file_names, points, pred_hms = [],[],[]\n", - "\n", - " for tid, val_data in enumerate(val_dataloader()):\n", - " imgs = []\n", - " for item in val_data:\n", - " imgs.append(item[0])\n", - " file_names.append(item[1])\n", - " points.append(item[2])\n", - " \n", - " batch_pred_hm, = exe.run(\n", - " program=val_prog,\n", - " feed={\"pixel\":np.array(imgs)},\n", - " fetch_list=[val_output],\n", - " use_program_cache=True)\n", - " pred_hms.append(batch_pred_hm)\n", - "\n", - " pred_hms = np.concatenate(pred_hms, 0)\n", - "\n", - " return file_names, np.array(points), pred_hms" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(607, 256, 256)\n", - "(607, 2)\n", - "(607,)\n" - ] - } - ], - "source": [ - "file_names, points, pred_hms = \\\n", - " infer(use_cuda=True, pretrained_model=\"../weights/loc_unet_best\")\n", - "\n", - "print(pred_hms.shape)\n", - "print(points.shape)\n", - "\n", - "ratio = 998. / 256. # all cropped images have same shape\n", - "dis, pred_idx = calc_dist(pred_hms, points, ratio)\n", - "print(dis.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Minimum Distance: 0.61, idx: 42\n", - "Maximum Distance: 62.52, idx: 420\n", - "Average Distance: 14.96\n" - ] - } - ], - "source": [ - "print(\"Minimum Distance: {:.2f}, idx: {}\".format(dis.min(), dis.argmin()))\n", - "print(\"Maximum Distance: {:.2f}, idx: {}\".format(dis.max(), dis.argmax()))\n", - "print(\"Average Distance: {:.2f}\".format(dis.mean()))" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Text(0, 0.5, 'Number')" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEJCAYAAACT/UyFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAVVUlEQVR4nO3de7BlZXnn8e8PxHgBBaSBTmPnoCESJolIt0SDk6BGQ5xJNMTEMRbDZJi0NQUpdKiaQcdEU/EPrAwmOiYmJBCYKmImUUQxJIjEy0RTSIMNdEMzYKeZNOmmsUaH1hrtoXnmj7WObE+fyz7dZ+3LWd9P1a6z1rvX2ut5T+9+9trveS+pKiRJ/XHEuAOQJI2WiV+SesbEL0k9Y+KXpJ4x8UtSz5j4JalnOkv8SZ6f5LNJ7kuyLcmlbfl7kjySZEv7eF1XMUiSDpau+vEnWQusraq7khwD3Am8Afhl4JtV9V86ubAkaVFP6+qFq2o3sLvd3pfkfmDdobzWCSecUDMzMysYnSStfnfeeefXqmrN3PLOEv+gJDPAS4DbgXOAS5L8a2AzcFlVfX2x82dmZti8eXPXYUrSqpLk4fnKO//jbpKjgY8Bb6uqx4EPAy8EzqT5RnDlAudtSrI5yebHHnus6zAlqTc6TfxJjqJJ+tdX1Q0AVfVoVR2oqieBPwbOnu/cqrqqqjZW1cY1aw76piJJOkRd9uoJcDVwf1W9f6B87cBhvwBs7SoGSdLBumzjPwe4ALg3yZa27J3Am5OcCRSwE3hrhzFIkuboslfP3wGZ56mbu7qmJGlpjtyVpJ4x8UtSz5j4JalnTPyS1DMjGbmr5Tv//IvYuXMvADMzJ3LDDVePOSJJq4WJf0Lt3LmXdetuard/bszRSFpNbOqRpJ4x8UtSz5j4JalnTPyS1DMmfknqGRO/JPWMiV+SesbEL0k9Y+KXpJ4x8UtSz5j4JalnTPyS1DMmfknqGRO/JPWMiV+Sesb5+FcJF26RNCwT/yrhwi2ShmVTjyT1jIlfknrGxC9JPWPil6SeMfFLUs+Y+CWpZ0z8ktQz9uNfYcMMpHKwlaRxMvGvsGEGUjnYStI42dQjST1j4peknuks8Sd5fpLPJrkvybYkl7blxye5NcmD7c/juopBknSwLu/4nwAuq6ozgJcBFyc5A7gcuK2qTgNua/clSSPSWeKvqt1VdVe7vQ+4H1gHvB64rj3sOuANXcUgSTrYSNr4k8wALwFuB06qqt3tU3uAk0YRgySp0XniT3I08DHgbVX1+OBzVVVALXDepiSbk2x+7LHHug5Tknqj08Sf5CiapH99Vd3QFj+aZG37/Fpg73znVtVVVbWxqjauWbOmyzAlqVe67NUT4Grg/qp6/8BTnwQubLcvBD7RVQySpIN1OXL3HOAC4N4kW9qydwJXAH+R5CLgYeCXO4xBkjRHZ4m/qv4OyAJPv7qr60qSFufIXUnqGRO/JPWMiV+SesbEL0k943z8Hdqx46ucdVYz3/5KLbgyuIjLnj27OPnkU9prPcy6dYf98pJ6wMTfof37WfEFVwYXcdm+/Qw2bHhqW5KGYVOPJPWMiV+SesbEL0k9Y+KXpJ4x8UtSz5j4Jaln7M45QQb76NsvX1JXTPwTZG4ffUnqgk09ktQzJn5J6hkTvyT1jIlfknrGxC9JPWPil6SeMfFLUs/Yj3+OwUFUCy2eMnjMYsdNu2F+F5Kmj4l/jsFBVAstnjJ4zGLHTbthfheSpo9NPZLUMyZ+SeoZE78k9YyJX5J6xsQvST1j4peknjHxS1LP2I9/lXMQlqS5TPyrnIOwJM1lU48k9YyJX5J6prPEn+SaJHuTbB0oe0+SR5JsaR+v6+r6kqT5dXnHfy1w3jzlv1tVZ7aPmzu8viRpHp0l/qr6AvC/u3p9SdKhGUcb/yVJ7mmbgo4bw/UlqddG3Z3zw8BvA9X+vBL4t/MdmGQTsAlg/fr1o4rve+zY8VXOOqvpArlYH/jB43bseJh160YW4orEM9jXf8+eXZx88ilDnytp+ow08VfVo7PbSf4Y+NQix14FXAWwcePG6j66g+3fz1B94AeP2779jJHEtpjlxjPY13/79jPYsGFy6iJp5Y20qSfJ2oHdXwC2LnSsJKkbS97xJzkS2FZVpy/nhZN8BDgXOCHJLuDdwLlJzqRp6tkJvHW5AUuSDs+Sib+qDiR5IMn6qvpfw75wVb15nmInipGkMRu2jf84YFuSLwPfmi2sqp/vJCpJUmeGTfy/0WkUkqSRGSrxV9Xnk/wAcFpVfSbJs4Ajuw1NktSFoXr1JPk14KPAH7VF64AbuwpKktSdYZt6LgbOBm4HqKoHk5zYWVSr0EKDqoYZbDVpA8QkTbdhE/93qmp/EgCSPI2mS6aGtNCgqmEGW03aADFJ023YAVyfT/JO4JlJXgP8JXBTd2FJkroybOK/HHgMuJdm0NXNwLu6CkqS1J1he/U8meQ6mjb+Ah6oKpt6JGkKDZX4k/wL4A+BrwIBTk3y1qr66y6DkyStvGH/uHsl8MqqegggyQuBvwJM/JI0ZYZt4983m/RbO4B9HcQjSerYonf8Sc5vNzcnuRn4C5o2/l8C7ug4NklSB5Zq6hlcfeRR4Kfa7ceAZ3YSkUZicNUtB4VJ/bJo4q+qXx1VIBqtuatuSeqPYXv1nAr8OjAzeI7TMkvS9Bm2V8+NNIuo3AQ82V04kqSuDZv4v11VH+w0EknSSAyb+D+Q5N3Ap4HvzBZW1V2dRCVJ6sywif9HgQuAV/FUU0+1+5KkKTJs4v8l4AVVtb/LYCRJ3Rs28W8FjgX2dhjLRFsNi6GshjpIOnzDJv5jge1J7uB72/h7051zNSyGshrqIOnwDZv4391pFJKkkRl2Pv7Pdx2IJGk0hh25u4+n1th9OnAU8K2qek5XgUmSujHsHf8xs9tpVlx/PfCyroKSJHVn2Pn4v6saNwI/00E8kqSODdvUc/7A7hHARuDbnUQkSerUsL16BuflfwLYSdPcI0maMsO28a/qefldlGRpg4O/9uzZxcknn3LQ9szMidxww9VLvtbg73vYcyStnKWWXvzNRZ6uqvrtFY5nLFyUZGlzB39t2HDw9s6dP7fg+YMGf9/DniNp5Sx1x/+tecqeDVwEPA9YFYlfkvpkqaUXr5zdTnIMcCnwq8CfA1cudJ4kaXIt2caf5HjgPwBvAa4Dzqqqr3cdmCSpG4v240/yO8AdwD7gR6vqPcMm/STXJNmbZOtA2fFJbk3yYPvzuMOKXpK0bEsN4LoM+H7gXcA/JXm8fexL8vgS514LnDen7HLgtqo6Dbit3ZckjdBSbfzLHtk7cO4XkszMKX49cG67fR3wOeA/Heo1JEnLd8iJ/RCdVFW72+09wEkjvr4k9d6wI3dXXFVVklro+SSbgE0A69evH1lcOnSDg7xgfIOzHCAmLW7Uif/RJGuraneStSyylGNVXQVcBbBx48YFPyA0OQYHecH4Bmc5QExa3Kibej4JXNhuXwh8YsTXl6Te6yzxJ/kI8PfAi5LsSnIRcAXwmiQPAj/d7kuSRqizpp6qevMCT726q2tKkpY26qYeSdKYmfglqWdM/JLUM2Prx6/Vb7Bfv/3ppclh4ldnBvv1259emhw29UhSz5j4JalnTPyS1DMmfknqGRO/JPWMiV+SesbEL0k9Yz9+jcTgYK4dOx5m3bqDj1loARUXVpFWlolfIzE4mGv79jPmPWahBVRcWEVaWTb1SFLPmPglqWdM/JLUMyZ+SeoZE78k9YyJX5J6xu6cmkjD9PuXdGhM/JpIw/T7l3RobOqRpJ4x8UtSz5j4JalnTPyS1DMmfknqGRO/JPWMiV+SembV9+MfZnEPBwiNz3IHag0e38WiLKNY9MWFZTRuqz7xD7O4hwOExme5A7UGj+9iUZZRLPriwjIaN5t6JKlnTPyS1DMmfknqmbG08SfZCewDDgBPVNXGccQhSX00zj/uvrKqvjbG60tSL9nUI0k9M67EX8Cnk9yZZNOYYpCkXhpXU88rquqRJCcCtybZXlVfGDyg/UDYBLB+/fpxxKgpcjgD8oY510FXWk3GcsdfVY+0P/cCHwfOnueYq6pqY1VtXLNmzahD1JSZHRS1bt1N7N//xIqfO3jM7AeANK1GnviTPDvJMbPbwGuBraOOQ5L6ahxNPScBH08ye/0/q6q/GUMcktRLI0/8VbUDePGorytJatidU5J6xsQvST1j4peknln18/EPWu6iH5psw/x7Dh6zZ88uTj75FGAy++I7VkCj0qvEv9xFPzTZhvn3nHvMhg2TuwCKC7RoVGzqkaSeMfFLUs+Y+CWpZ0z8ktQzJn5J6hkTvyT1jIlfknqmV/34peWY9AF/4xrw5UCz6WfilxYw6QP+xjXgy4Fm08+mHknqGRO/JPWMiV+SesbEL0k9Y+KXpJ4x8UtSz5j4Jaln7MevXjqcwVkLreq10PZig5yGiWNwwNTg6w4Tt4OtNB8Tv3rpcAZnLbSq10Lbiw1yGiaOwQFTc6+xFAdbaT429UhSz5j4JalnTPyS1DMmfknqGRO/JPWMiV+SesbunFLHBvvqN/tL979f7jiDweOH6a8/CYu4DI5JGDaOhc5f6NxpHMcwGDN0E7eJX+rYYF99GK7//XLHGQweP0x//UlYxGVwTMKwcSx0/kLnTuM4hsGYm/2Vj9umHknqGRO/JPWMiV+SemYsiT/JeUkeSPJQksvHEYMk9dXIE3+SI4HfB34WOAN4c5LlzZIlSTpk47jjPxt4qKp2VNV+4M+B148hDknqpXEk/nXAPw7s72rLJEkjkKoa7QWTNwLnVdW/a/cvAH68qi6Zc9wmYFO7+yLggWVc5gTgaysQ7jithjrA6qiHdZgM1mH5fqCq1swtHMcArkeA5w/sn9KWfY+qugq46lAukGRzVW08tPAmw2qoA6yOeliHyWAdVs44mnruAE5LcmqSpwP/CvjkGOKQpF4a+R1/VT2R5BLgFuBI4Jqq2jbqOCSpr8YyV09V3Qzc3OElDqmJaMKshjrA6qiHdZgM1mGFjPyPu5Kk8XLKBknqmVWX+KdxOogk1yTZm2TrQNnxSW5N8mD787hxxriUJM9P8tkk9yXZluTStnxq6pHkGUm+nOTutg6/1ZafmuT29j3139tOCRMtyZFJvpLkU+3+VNUhyc4k9ybZkmRzWzY176VZSY5N8tEk25Pcn+Tlk1CPVZX4p3g6iGuB8+aUXQ7cVlWnAbe1+5PsCeCyqjoDeBlwcfu7n6Z6fAd4VVW9GDgTOC/Jy4D3Ab9bVT8IfB24aIwxDutS4P6B/Wmswyur6syB7o/T9F6a9QHgb6rqdODFNP8m469HVa2aB/By4JaB/XcA7xh3XEPGPgNsHdh/AFjbbq8FHhh3jMuszyeA10xrPYBnAXcBP04z4OZpbfn3vMcm8UEzNuY24FXAp4BMYR12AifMKZuq9xLwXOAfaP+WOkn1WFV3/Kyu6SBOqqrd7fYe4KRxBrMcSWaAlwC3M2X1aJtItgB7gVuBrwLfqKon2kOm4T31e8B/BJ5s95/H9NWhgE8nubMdxQ9T9l4CTgUeA/60bXb7kyTPZgLqsdoS/6pUza3BVHS/SnI08DHgbVX1+OBz01CPqjpQVWfS3DWfDZw+5pCWJcm/BPZW1Z3jjuUwvaKqzqJptr04yU8OPjkN7yWa7vJnAR+uqpcA32JOs8646rHaEv9Q00FMiUeTrAVof+5d4vixS3IUTdK/vqpuaIunrh4AVfUN4LM0zSLHJpkd8zLp76lzgJ9PspNm5ttX0bQzT1MdqKpH2p97gY/TfAhP23tpF7Crqm5v9z9K80Ew9nqstsS/mqaD+CRwYbt9IU2b+cRKEuBq4P6qev/AU1NTjyRrkhzbbj+T5m8U99N8ALyxPWyi61BV76iqU6pqhub9/7dV9RamqA5Jnp3kmNlt4LXAVqbovQRQVXuAf0zyorbo1cB9TEI9xv0HkA7+oPI64H/StM3+53HHM2TMHwF2A/+P5i7hIpp22duAB4HPAMePO84l6vAKmq+s9wBb2sfrpqkewI8BX2nrsBX4zbb8BcCXgYeAvwS+b9yxDlmfc4FPTVsd2ljvbh/bZv8fT9N7aaAuZwKb2/fUjcBxk1APR+5KUs+stqYeSdISTPyS1DMmfknqGRO/JPWMiV+SesbEr7FLcqCdhXH2cUiTVrUzOp7Qbn9pgWOuTfLG+Z5bKQP12dbO9HlZkiPa5zYm+eAi584k+ZUu45PGsgKXNMf/rWaahBVTVT+xkq+3TN+tT5ITgT8DngO8u6o20/TrXsgM8CvtOVInvOPXxJpzB78xyefa7aOT/Gk7X/s9SX5xnnO/2f5Mkg+lWaPhM8CJA8dsSPL5diKwWwaG0f9akjvau/WPJXlWW35tkg8m+VKSHcN8c6hmyoFNwCVtLOcOzJH/UwPfcr7Sjla9Avjnbdnb228A/yPJXe3jJ9pzz03yuYG53q9vR0+T5KVtjHenWV/gmHbyud9p63VPkrce+r+Mpt64R7b58AEc4KnRvluAN7XlO2mn5gU2Ap9rt98H/N7A+cfNc/w325/n08yyeSTw/cA3aKYuOAr4ErCmPe5NwDXt9vMGXvu9wK+329fSjHo9gma9h4cWqM835yn7Bs0sjOfy1Gjam4Bz2u2jab6Bf/f5tvxZwDPa7dOAze32ucD/oZl35wjg72lGTz8d2AG8tD3uOe3rbgLe1ZZ9H823jlPH/W/vYzwPm3o0CZbb1PPTNPPQAFBVX1/k2J8EPlJVB4B/SvK3bfmLgB8Bbm1vlI+kmTYD4EeSvBc4liYh3zLwejdW1ZPAfUkOdzrdLwLvT3I9cENV7WpjGXQU8KEkZ9J8QP7QwHNfrqpdAGmmkp6h+TDYXVV3AFQ7Q2qS1wI/NvAt5bk0HyT/cJh10BQy8WuSPcFTzZHPWOHXDrCtql4+z3PXAm+oqruT/Buau+tZ35nzGktfKHkBTdLeC/zwbHlVXZHkr2jmNPpikp+Z5/S3A4/SrN50BPDtBWI5wOL/n0PzzeWWRY5RT9jGr0m2E9jQbg+2498KXDy7k8XXLP0C8Ka2jXst8Mq2/AFgTZKXt69xVJJ/1j53DLA7zTTTbzmcCiRZA/wh8KGqqjnPvbCq7q2q99HMLHs6sK+9/qzn0tzBPwlcQPPNZDEPAGuTvLS9xjFppmO+Bfj3bZ1I8kPtzJfqIRO/JsEz53TnvKIt/y3gA2kW2z4wcPx7geOSbE1yN08l8/l8nGYWxPuA/0bTFk5V7adp639f+xpbgNmeQL9Bs3rYF4Hth1GfbTSzL366rctcb2vrcA/NzKx/TTOL44H2D7NvB/4AuLCN8XSaxTwW1NbrTcB/bc+5lebb0p+0v4O7kmwF/gi/8feWs3NKUs94xy9JPWPil6SeMfFLUs+Y+CWpZ0z8ktQzJn5J6hkTvyT1jIlfknrm/wMEkwrQ/br6XAAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "_ = plt.hist(dis, bins=100, facecolor=\"blue\", edgecolor=\"black\", alpha=0.7)\n", - "plt.xlabel(\"Euclidean Distance\")\n", - "plt.ylabel(\"Number\")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Good Case\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "print(\"Good Case\")\n", - "plt.figure(figsize=(8,5))\n", - "\n", - "idx = dis.argmin()\n", - "\n", - "img = cv2.imread(file_names[idx])[:,:,::-1]\n", - "\n", - "plt.subplot(1,2,1)\n", - "plt.title(\"Loc Result\")\n", - "plt.imshow(img)\n", - "plt.scatter(*points[idx], c='r', s=150)\n", - "plt.scatter(*pred_idx[idx], c='b', s=120, marker='*')\n", - "\n", - "plt.subplot(1,2,2)\n", - "plt.title(\"Pred Heatmap\")\n", - "plt.imshow(pred_hms[idx])" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Bad Case\n", - "../datasets/Training100/ASOCT_Image_loc/T0035-09_left.jpg\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "print(\"Bad Case\")\n", - "\n", - "plt.figure(figsize=(8,5))\n", - "\n", - "idx = dis.argmax()\n", - "print(file_names[idx])\n", - "\n", - "img = cv2.imread(file_names[idx])[:,:,::-1]\n", - "\n", - "plt.subplot(1,2,1)\n", - "plt.title(\"Loc Result\")\n", - "plt.imshow(img)\n", - "plt.scatter(*points[idx], c='r', s=150)\n", - "plt.scatter(*pred_idx[idx], c='b', s=120, marker='*')\n", - "\n", - "plt.subplot(1,2,2)\n", - "plt.title(\"Pred Heatmap\")\n", - "plt.imshow(pred_hms[idx])" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "result = {}\n", - "for file, pred_id in zip(file_names, pred_idx):\n", - " # T0055-15_right.jpg -> T0055-15\n", - " ori_name = file.split(\"/\")[-1].split(\"_\")[0]\n", - " if ori_name not in result.keys():\n", - " result[ori_name] = [[-1, -1],[-1, -1]]\n", - "\n", - " if \"left\" in file: result[ori_name][0] = pred_id\n", - " else: \n", - " image = cv2.imread(os.path.join(origin_data_dir, ori_name+\".jpg\"))\n", - " nrows, ncols, ch = image.shape\n", - " result[ori_name][1] = [(ncols - nrows) + pred_id[0], pred_id[1]]" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "# output_file do not contain coordinates==-1 images\n", - "# Due to the preprocessing step\n", - "# For real inference, keep all images in val_data_list.\n", - "\n", - "with open(output_file, \"w+\") as f:\n", - " f.write(\"{},{},{},{},{}\\n\".format(\"ASOCT_NAME\", \"X_LEFT\", \"Y_LEFT\", \"X_RIGHT\", \"Y_RIGHT\"))\n", - " for file, pred_point in result.items():\n", - " f.write(\"{},{},{},{},{}\\n\".format(file+\".jpg\", *pred_point[0], *pred_point[1]))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/PaddleCV/Research/AGEchallenge/LocalizationUNet/README.md b/PaddleCV/Research/AGEchallenge/LocalizationUNet/README.md deleted file mode 100644 index 5598f2663b63d3b3567002282e3a5c8908a77af3..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationUNet/README.md +++ /dev/null @@ -1,35 +0,0 @@ -# Angle closure Glaucoma Evaluation Challenge -The goal of the challenge is to evaluate and compare automated algorithms for angle closure classification and localization of scleral spur (SS) points on a common dataset of AS-OCT images. We invite the medical image analysis community to participate by developing and testing existing and novel automated classification and segmentation methods. -More detail [AGE challenge](https://age.grand-challenge.org/Details/). - -## Scleral spur localization task (ResNet50+UNet model) - -1. Method - - * Inspired by UNet method, a keypoint is equivalent to 2D gaussian heatmap. - - - - - * Then, a localization task could be transformed to a heatmap regression task. - -2. Prepare data - - * We assume that you have downloaded data(two zip files), and store @ `../datasets/`. - * (Updated on August 5) Replace update files. - * We provide a demo about `zip file extract`, `data structure explore`, and `Train/Val split`. - -3. Train - - * We assume that you have download data, extract compressed files, and store @ `../datasets/`. - * Based on PaddlePaddle and [ResNet50](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/image_classification/models/resnet.py), we modify the model structure to enable UNet model, which global pooling layer and final fc layer were removed. - -4. Inference - - * We assume that you have download data, extract compressed files, and store @ `../datasets/`. - * We assume that you stored checkpoint files @ `../weights/loc_unet` - * We provide a baseline about `inference` and `visualization`. - - - - diff --git a/PaddleCV/Research/AGEchallenge/LocalizationUNet/assets/1.png b/PaddleCV/Research/AGEchallenge/LocalizationUNet/assets/1.png deleted file mode 100644 index be44b9eea1b002f79e86396013a530cca3d4bf88..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/AGEchallenge/LocalizationUNet/assets/1.png and /dev/null differ diff --git a/PaddleCV/Research/AGEchallenge/LocalizationUNet/assets/2.png b/PaddleCV/Research/AGEchallenge/LocalizationUNet/assets/2.png deleted file mode 100644 index b1a90b26998f9f4540bd15d6a1faaa597d094b7d..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/AGEchallenge/LocalizationUNet/assets/2.png and /dev/null differ diff --git a/PaddleCV/Research/AGEchallenge/LocalizationUNet/assets/3.png b/PaddleCV/Research/AGEchallenge/LocalizationUNet/assets/3.png deleted file mode 100644 index dfd1a015da77950a1ce789b4bba895209609296b..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/AGEchallenge/LocalizationUNet/assets/3.png and /dev/null differ diff --git a/PaddleCV/Research/AGEchallenge/LocalizationUNet/assets/4.png b/PaddleCV/Research/AGEchallenge/LocalizationUNet/assets/4.png deleted file mode 100644 index beae1c839714206b38474aba5e851320661de6af..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/AGEchallenge/LocalizationUNet/assets/4.png and /dev/null differ diff --git a/PaddleCV/Research/AGEchallenge/LocalizationUNet/res_unet_paddle.py b/PaddleCV/Research/AGEchallenge/LocalizationUNet/res_unet_paddle.py deleted file mode 100644 index a64da38da7e7f20dcc45e5d87e3aa66826d6a2f4..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationUNet/res_unet_paddle.py +++ /dev/null @@ -1,62 +0,0 @@ -import paddle -import paddle.fluid as fluid -import paddle.fluid.layers as FL -from paddle.fluid.param_attr import ParamAttr - -from resnet import * - -def conv_bn_layer(input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = FL.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False, - name=name + '.conv2d.output.1') - - bn_name = name + "_bn" - return FL.batch_norm(input=conv, - act=act, - name=bn_name+'.output.1', - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance',) - -def DoubleConv_up(x, out_channels, name=None): - x = conv_bn_layer(x, out_channels, 3, 1, act='relu', name=name+"1") - x = conv_bn_layer(x, out_channels, 3, 1, act='relu', name=name+"2") - return x - - -def ConvUp(x1, x2, out_channels, name=None): - x1 = FL.conv2d_transpose(x1, num_filters=x1.shape[1] // 2, filter_size=2, stride=2) - x = FL.concat([x1,x2], axis=1) - x = DoubleConv_up(x, out_channels, name=name+"_doubleconv") - return x - -class ResUNet(): - def __init__(self, backbone, out_channels): - self.backbone = backbone - self.out_channels = out_channels - - def net(self, input): - c1, c2, c3, c4, c5 = self.backbone(input) - channels = [64, 128, 256, 512] - x = ConvUp(c5, c4, channels[2], name='up5') - x = ConvUp(x, c3, channels[1], name='up6') - x = ConvUp(x, c2, channels[0], name='up7') - x = ConvUp(x, c1, channels[0], name='up8') - x = FL.conv2d_transpose(x, num_filters=self.out_channels, filter_size=2, stride=2) - - return x \ No newline at end of file diff --git a/PaddleCV/Research/AGEchallenge/LocalizationUNet/resnet.py b/PaddleCV/Research/AGEchallenge/LocalizationUNet/resnet.py deleted file mode 100644 index da4d53958fea871a228e6a9c44b9e23a28bf4047..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/LocalizationUNet/resnet.py +++ /dev/null @@ -1,188 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = ["ResNet", "ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class ResNet(): - def __init__(self, layers=50): - self.params = train_parameters - self.layers = layers - - def net(self, input): - layers = self.layers - supported_layers = [18, 34, 50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters = [64, 128, 256, 512] - - unet_collector = [] - - conv = self.conv_bn_layer( - input=input, num_filters=64, filter_size=7, stride=2, act='relu',name="conv1") - unet_collector.append(conv) - - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - if layers >= 50: - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name="res"+str(block+2)+"a" - else: - conv_name="res"+str(block+2)+"b"+str(i) - else: - conv_name="res"+str(block+2)+chr(97+i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, name=conv_name) - unet_collector.append(conv) - else: - for block in range(len(depth)): - for i in range(depth[block]): - conv_name="res"+str(block+2)+chr(97+i) - conv = self.basic_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - is_first=block==i==0, - name=conv_name) - unet_collector.append(conv) - return unet_collector - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False, - name=name + '.conv2d.output.1') - - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm(input=conv, - act=act, - name=bn_name+'.output.1', - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance',) - - def shortcut(self, input, ch_out, stride, is_first, name): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1 or is_first == True: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, name): - conv0 = self.conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu',name=name+"_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name+"_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name+"_branch2c") - - short = self.shortcut(input, num_filters * 4, stride, is_first=False, name=name + "_branch1") - - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu',name=name+".add.output.5") - - def basic_block(self, input, num_filters, stride, is_first, name): - conv0 = self.conv_bn_layer(input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride, - name=name+"_branch2a") - conv1 = self.conv_bn_layer(input=conv0, num_filters=num_filters, filter_size=3, act=None, - name=name+"_branch2b") - short = self.shortcut(input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - -def ResNet18(): - model = ResNet(layers=18) - return model - - -def ResNet34(): - model = ResNet(layers=34) - return model - - -def ResNet50(): - model = ResNet(layers=50) - return model - - -def ResNet101(): - model = ResNet(layers=101) - return model - - -def ResNet152(): - model = ResNet(layers=152) - return model diff --git a/PaddleCV/Research/AGEchallenge/README.md b/PaddleCV/Research/AGEchallenge/README.md deleted file mode 100644 index c237374f1f48be26935fdfde45e2d065fb4d9416..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/AGEchallenge/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# Angle closure Glaucoma Evaluation Challenge -The goal of the challenge is to evaluate and compare automated algorithms for angle closure classification and localization of scleral spur (SS) points on a common dataset of AS-OCT images. We invite the medical image analysis community to participate by developing and testing existing and novel automated classification and segmentation methods. -More detail [AGE challenge](https://age.grand-challenge.org/Details/). - -## 1.Download data -After you sign up `Grand Challenge` and join the [AGE challenge](https://age.grand-challenge.org/Details/). - -Dataset can be downloaded from the [Download page](https://age.grand-challenge.org/Download/) - -We assume `Training100.zip` and `Validation_ASOCT_Image.zip` are stored @ `./AGE_challenge Baseline/datasets/` - -## 2.Environment installation -* Python >= 3.5 -* cuDNN >= 7.3 -* CUDA 9 -* paddlepaddle-gpu >= 1.5.0 -* xlrd == 1.2.0 -* tqdm == 4.32.2 -* pycocotools == 2.0.0 - -More detail [PaddlePaddle Installation Manuals](https://www.paddlepaddle.org.cn/documentation/docs/en/1.5/beginners_guide/install/index_en.html) - -## 3. Angle closure classification task - -See `Classification/`. - -## 4. Scleral spur localization task - -We provide two baseline models for localization task. - -See `LocalizationFCN/` and `LocalizationRCNN/`. \ No newline at end of file diff --git a/PaddleCV/Research/PWCNet/AverageMeter.py b/PaddleCV/Research/PWCNet/AverageMeter.py deleted file mode 100644 index 633e6c067d465559d2da61913342da2e521ac731..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/AverageMeter.py +++ /dev/null @@ -1,18 +0,0 @@ - - -class AverageMeter(object): - """Computes and stores the average and current value""" - def __init__(self): - self.reset() - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count diff --git a/PaddleCV/Research/PWCNet/README.md b/PaddleCV/Research/PWCNet/README.md deleted file mode 100644 index b3335013b641836c47b61dd31f8a6f5459188254..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/README.md +++ /dev/null @@ -1,86 +0,0 @@ -# PWCNet reimplement using paddlepaddle DyGraph -PWC-Net: CNNs for Optical Flow Using Pyramid, Warping, and Cost Volume. -# Environment -``` -cenntos7 -paddle develop version (after 20191201) install from source -python3.7 -SciPy 1.1.0 -``` -code will update for paddle v1.7 later. -# Compile correlation op -``` -cd correlation_op -sh make.sh -``` -# Datasets -1.Please download the `FlyingChairs dataset` and `FlyingChairs_train_val.txt` from https://lmb.informatik.uni-freiburg.de/resources/datasets - -Or you can use `./data/download.sh` to download datasets. - -We split the data to train and val by using `FlyingChairs_train_val.txt` with `1 for train and 2 for val`. -# Inference -Note that the paddle models `pwc_net_paddle.pdparams` and `pwc_net_chairs_paddle.pdparams` are transferred from the pytorch pth files `pwc_net.pth.tar` and `pwc_net_chairs.pth.tar`. - -Run -``` -python infer.py -``` - -| Input img1 | Input img2 | -|-------|------------| -| | | - -|prediction with pwc_net_paddle.pdparams| prediction with pwc_net_chairs_paddle.pdparams| -|-------------|-------------| -| | | - -# First Train with L2 loss -A single gpu is supported. Multi gpus will be supported later. - -You should check parameters in `my_args.py` as you like. - -And change them in `train.sh`. -``` ---data_root ---train_val_txt ---batch_size -``` -Then run -``` -./train.sh -``` -Some results during training can be seen -``` -./img1.png -./img2.png -./hsv_pd.png # ground truth -./hsv_predict.png # output of model -``` - -# Finetune with L1 loss -finetune from your best pretrain model by adding --pretrained your_best_model_name eg. `--pretrained epoch_7_pwc_net_paddle` - -Run -``` -./finetune.sh -``` -# Note -This code reimplement PWCNet like the code of `https://github.com/NVlabs/PWC-Net` -If you want to want to train like the paper -``` -@InProceedings{Sun2018PWC-Net, - author = {Deqing Sun and Xiaodong Yang and Ming-Yu Liu and Jan Kautz}, - title = {{PWC-Net}: CNNs for Optical Flow Using Pyramid, Warping, and Cost Volume}, - booktitle = CVPR, - year = {2018}, -} -``` -Please use all the datasets in `./data/download.sh` if you like. And use the code in `./data/datasets.py`. - -Reference works -``` -https://github.com/NVlabs/PWC-Net -https://github.com/ClementPinard/FlowNetPytorch -https://github.com/NVIDIA/flownet2-pytorch/blob/master/datasets.py -``` \ No newline at end of file diff --git a/PaddleCV/Research/PWCNet/__init__.py b/PaddleCV/Research/PWCNet/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/PaddleCV/Research/PWCNet/correlation_op/README.md b/PaddleCV/Research/PWCNet/correlation_op/README.md deleted file mode 100644 index d83c6fe61d6fef1d01139289b69605628e689d72..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/correlation_op/README.md +++ /dev/null @@ -1,14 +0,0 @@ -自定义OP编译: -1. 使用paddle develop 12月1日之后的版本 -2. sh make.sh编译成correlation_lib.so动态库 -3. 添加动态库路径到LD_LIBRARY_PATH: -``` -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python3.7 -c 'import paddle; print(paddle.sysconfig.get_lib())'` -``` -4. 添加correlation op的python路径: -``` -export PYTHONPATH=$PYTHONPATH:`pwd` -``` -5. python test_correlation.py运行单测,验证是否加载成功。 - -PS: 如果paddle whl包是从官网上下载的,需要使用gcc 4.8,即把make.sh中的g++ 改为 g++-4.8 diff --git a/PaddleCV/Research/PWCNet/correlation_op/correlation.py b/PaddleCV/Research/PWCNet/correlation_op/correlation.py deleted file mode 100644 index 05e9267d1fcb51344e096592ad86d22223b99f75..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/correlation_op/correlation.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle.fluid as fluid -import os -file_dir = os.path.dirname(os.path.abspath(__file__)) -fluid.load_op_library(os.path.join(file_dir, 'correlation_lib.so')) - -from paddle.fluid.layer_helper import LayerHelper - -def correlation(input1, input2, pad_size, kernel_size, max_displacement, stride1, stride2, corr_type_multiply=1): - helper = LayerHelper("correlation", **locals()) - output = helper.create_variable_for_type_inference(dtype=input1.dtype) - helper.append_op(type="correlation", inputs={"Input1": input1, "Input2": input2}, attrs={"pad_size": pad_size, "kernel_size": kernel_size, "max_displacement": max_displacement, "stride1": stride1, "stride2": stride2, "corr_type_multiply": corr_type_multiply}, outputs = {"Output": output}) - return output diff --git a/PaddleCV/Research/PWCNet/correlation_op/correlation_op.cc b/PaddleCV/Research/PWCNet/correlation_op/correlation_op.cc deleted file mode 100644 index 4902db3ed7115d0d315ae2f2cbab5ea1a5ee6528..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/correlation_op/correlation_op.cc +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; - -inline std::vector CorrelationOutputSize(int batch, int input_height, int input_width, int stride1, int stride2, int kernel_size, int pad_size, int max_displacement) { - - std::vector output_shape({batch}); - int kernel_radius = (kernel_size - 1) / 2; - int border_radius = kernel_radius + max_displacement; - int padded_input_height = input_height + 2 * pad_size; - int padded_input_width = input_width + 2 * pad_size; - int output_channel = ((max_displacement/stride2) * 2 + 1) * ((max_displacement/stride2) * 2 + 1); - output_shape.push_back(output_channel); - int output_height = std::ceil(static_cast(padded_input_height - 2 * border_radius) / static_cast(stride1)); - int output_width = std::ceil(static_cast(padded_input_width - 2 * border_radius) / static_cast(stride1)); - output_shape.push_back(output_height); - output_shape.push_back(output_width); - return output_shape; -} - -class CorrelationOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override{ - AddInput("Input1", "input1"); - AddInput("Input2", "input2"); - AddOutput("Output", "output"); - AddAttr("pad_size", "pad size for input1 and input2"); - AddAttr("kernel_size", "kernel size of input1 and input2"); - AddAttr("max_displacement", "max displacement of input1 and input2"); - AddAttr("stride1", "Input1 stride"); - AddAttr("stride2", "Input2 stride"); - AddAttr("corr_type_multiply", "correlation coefficient").SetDefault(1); - AddComment(R"DOC(Correlation of two feature map. Only support NCHW data format.)DOC"); - } -}; - -class CorrelationOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override{ - PADDLE_ENFORCE_EQ(ctx->HasInput("Input1"), true, "Input(input1) cannot be null"); - PADDLE_ENFORCE_EQ(ctx->HasInput("Input2"), true, "Input(input2) cannot be null"); - int stride1 = ctx->Attrs().Get("stride1"); - int stride2 = ctx->Attrs().Get("stride2"); - int max_displacement = ctx->Attrs().Get("max_displacement"); - int pad_size = ctx->Attrs().Get("pad_size"); - int kernel_size = ctx->Attrs().Get("kernel_size"); - - auto in_dims = ctx->GetInputDim("Input1"); - auto in2_dims = ctx->GetInputDim("Input2"); - PADDLE_ENFORCE_EQ(in_dims.size() == 4, true, "input1 must be 4-dims"); - PADDLE_ENFORCE_EQ(in2_dims.size() == 4, true, "input2 must be 4-dims"); - std::vector output_shape = CorrelationOutputSize(in_dims[0], in_dims[2], in_dims[3], stride1, stride2, kernel_size, pad_size, max_displacement); - ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); - } - - protected: - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override{ - auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input1"); - PADDLE_ENFORCE_EQ(input_data_type, ctx.Input("Input2")->type(), "Input1 and Input2 shoule have same type"); - return framework::OpKernelType(input_data_type, ctx.GetPlace()); - } -}; - -template -class CorrelationOpGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - std::unique_ptr Apply() const override { - auto* op = new T(); - op->SetType("correlation_grad"); - op->SetInput("Input1", this->Input("Input1")); - op->SetInput("Input2", this->Input("Input2")); - op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output")); - op->SetOutput(framework::GradVarName("Input1"), this->InputGrad("Input1")); - op->SetOutput(framework::GradVarName("Input2"), this->InputGrad("Input2")); - op->SetAttrMap(this->Attrs()); - - return std::unique_ptr(op); - } -}; - -class CorrelationOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override{ - PADDLE_ENFORCE_EQ(ctx->HasInput("Input1"), true, "Input(Input1) should not be null"); - PADDLE_ENFORCE_EQ(ctx->HasInput("Input2"), true, "Input(Input2) should not be null"); - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Output")), true, "Input(Output@GRAD) should not be null"); - - auto in1_dims = ctx->GetInputDim("Input1"); - auto in2_dims = ctx->GetInputDim("Input2"); - ctx->SetOutputDim(framework::GradVarName("Input1"), in1_dims); - ctx->SetOutputDim(framework::GradVarName("Input2"), in1_dims); - } - - protected: - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override{ - const auto* var = ctx.InputVar(framework::GradVarName("Output")); - if (var == nullptr) { - PADDLE_THROW("cannot find Output@GRAD"); - } - return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(ctx, "Input1"), ctx.GetPlace()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(correlation, ops::CorrelationOp, ops::CorrelationOpMaker, - ops::CorrelationOpGradMaker, - ops::CorrelationOpGradMaker); -REGISTER_OPERATOR(correlation_grad, ops::CorrelationOpGrad); diff --git a/PaddleCV/Research/PWCNet/correlation_op/correlation_op.cu b/PaddleCV/Research/PWCNet/correlation_op/correlation_op.cu deleted file mode 100644 index 161844430fe4b9dfeaf80dbe127d802d67a6de76..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/correlation_op/correlation_op.cu +++ /dev/null @@ -1,434 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include "paddle/fluid/framework/op_registry.h" - -#define THREADS_PER_BLOCK 32 -#define FULL_MASK 0xffffffff - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; - -template -__forceinline__ __device__ T warpReduceSum(T val) { - for (int offset = 16; offset > 0; offset /= 2) { - val += __shfl_down_sync(FULL_MASK, val, offset); - } - return val; -} - -template -__forceinline__ __device__ T blockReduceSum(T val) { - static __shared__ T shared[32]; - int lane = threadIdx.x % warpSize; - int wid = threadIdx.x / warpSize; - - val = warpReduceSum(val); - if (lane == 0) - shared[wid] = val; - - __syncthreads(); - val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0; - - if (wid == 0) - val = warpReduceSum(val); - - return val; -} - -template -__global__ void set_zero(T *x, int num) { - for(int i = blockIdx.x * blockDim.x + threadIdx.x; i < num; i += blockDim.x * gridDim.x) - x[i] = static_cast(0); -} - -template -__global__ void channel_first(const T *input, T *rinput, const int channel, const int height, const int width, const int pad_size) { - int n = blockIdx.x; - int h = blockIdx.y; - int w = blockIdx.z; - - int ch_off = threadIdx.x; - T value; - int dimchw = channel * height * width; - int dimhw = height * width; - - int p_dimw = (width + 2 * pad_size); - int p_dimh = (height + 2 * pad_size); - int p_dimchw = channel * p_dimw * p_dimh; - int p_dimcw = channel * p_dimw; - - for (int c = ch_off; c < channel; c += THREADS_PER_BLOCK) { - value = input[n * dimchw + c * dimhw + h * width + w]; - rinput[n * p_dimchw + (h + pad_size) * p_dimcw + (w + pad_size) * channel + c] = value; - } -} - -template -__global__ void correlation_forward(T *output, const int output_channel, const int output_height, const int output_width, const T *rinput1, const int input_channel, const int input_height, const int input_width, const T *rinput2, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2) { - - int p_input_width = input_width + 2 * pad_size; - int p_input_height = input_height + 2 * pad_size; - - int kernel_rad = (kernel_size - 1) / 2; - int displacement_rad = max_displacement / stride2; - - int displacement_size = 2 * displacement_rad + 1; - - int n = blockIdx.x; - int h1 = blockIdx.y * stride1 + max_displacement; - int w1 = blockIdx.z * stride1 + max_displacement; - int c = threadIdx.x; - - int p_dimchw = p_input_height * p_input_width * input_channel; - int p_dimcw = p_input_width * input_channel; - int p_dimc = input_channel; - - int t_dimchw = output_channel * output_height * output_width; - int t_dimhw = output_height * output_width; - int t_dimw = output_width; - - int nelems = kernel_size * kernel_size * p_dimc; - - for (int tj = -displacement_rad; tj <= displacement_rad; ++tj) { - for(int ti = -displacement_rad; ti <= displacement_rad; ++ti) { - int w2 = w1 + ti * stride2; - int h2 = h1 + tj * stride2; - - T acc0 = 0; - for(int j = -kernel_rad; j <= kernel_rad; ++j) { - for(int i = -kernel_rad; i <= kernel_rad; ++i) { - for(int ch = c; ch < p_dimc; ch += blockDim.x) { - int index1 = n * p_dimchw + (h1 + j) * p_dimcw + (w1 + i) * p_dimc + ch; - int index2 = n * p_dimchw + (h2 + j) * p_dimcw + (w2 + i) * p_dimc + ch; - acc0 += static_cast(rinput1[index1] * rinput2[index2]); - } - } - } - if (blockDim.x == warpSize) { - __syncwarp(); - acc0 = warpReduceSum(acc0); - } else { - __syncthreads(); - acc0 = blockReduceSum(acc0); - } - - if (threadIdx.x == 0) { - int tc = (tj + displacement_rad) * displacement_size + (ti + displacement_rad); - const int t_index = n * t_dimchw + tc * t_dimhw + blockIdx.y * t_dimw + blockIdx.z; - output[t_index] = static_cast(acc0 / nelems); - } - } - } - -} - -//class CorrelationKernel -template -class CorrelationKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, "It must be CUDAPlace"); - - auto *input1 = ctx.Input("Input1"); - auto *input2 = ctx.Input("Input2"); - int pad_size = ctx.Attr("pad_size"); - int kernel_size = ctx.Attr("kernel_size"); - int stride1 = ctx.Attr("stride1"); - int stride2 = ctx.Attr("stride2"); - int max_displacement = ctx.Attr("max_displacement"); - int corr_type_multiply = ctx.Attr("corr_type_multiply"); - - auto *output = ctx.Output("Output"); - output->mutable_data(ctx.GetPlace()); - auto &dev_ctx = ctx.template device_context(); - - // base on input1, NCHW - auto in_dims = input1->dims(); - int N = in_dims[0]; - int C = in_dims[1]; - int H = in_dims[2]; - int W = in_dims[3]; - - int padded_input_height = H + 2 * pad_size; - int padded_input_width = W + 2 * pad_size; - - Tensor rinput1 = ctx.AllocateTmpTensor({N, padded_input_height, padded_input_width, C}, dev_ctx); - rinput1.mutable_data(ctx.GetPlace()); - - Tensor rinput2 = ctx.AllocateTmpTensor({N, padded_input_height, padded_input_width, C}, dev_ctx); - rinput2.mutable_data(ctx.GetPlace()); - - set_zero<<<(rinput1.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput1.data(), rinput1.numel()); - set_zero<<<(rinput2.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput2.data(), rinput2.numel()); - set_zero<<<(output->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(output->data(), output->numel()); - - auto out_dims = output->dims(); - int OC = out_dims[1]; - int OH = out_dims[2]; - int OW = out_dims[3]; - - dim3 blocks_grid(N, H, W); - dim3 threads_block(THREADS_PER_BLOCK); - - channel_first<<>>(input1->data(), rinput1.data(), C, H, W, pad_size); - channel_first<<>>(input2->data(), rinput2.data(), C, H, W, pad_size); - - dim3 threadsPerBlock(THREADS_PER_BLOCK); - dim3 totalBlocksCorr(N, OH, OW); - - correlation_forward<<>>(output->data(), OC, OH, OW, rinput1.data(), -C, H, W, rinput2.data(), pad_size, kernel_size, max_displacement, stride1, stride2); - } -}; - -template -__global__ void correlation_backward_input1(int item, T *grad_input1, const int input_channel, const int input_height, const int input_width, const T *grad_output, const int output_channel, const int output_height, const int output_width, const T *rinput2, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2) { - - int n = item; - int h = blockIdx.x * stride1 + pad_size; - int w = blockIdx.y * stride1 + pad_size; - int c = blockIdx.z; - int tch_off = threadIdx.x; - - int kernel_rad = (kernel_size - 1) / 2; - int displacement_rad = max_displacement / stride2; - int displacement_size = 2 * displacement_rad + 1; - - int xmin = (w - kernel_rad - max_displacement) / stride1; - int ymin = (h - kernel_rad - max_displacement) / stride1; - - int xmax = (w + kernel_rad - max_displacement) / stride1; - int ymax = (h + kernel_rad - max_displacement) / stride1; - - if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) { - return; - } - - if (xmin > xmax || ymin > ymax) { - return; - } - - xmin = max(0, xmin); - xmax = min(output_width - 1, xmax); - - ymin = max(0, ymin); - ymax = min(output_height - 1, ymax); - - int p_input_width = input_width + 2 * pad_size; - int p_input_height = input_height + 2 * pad_size; - int p_dimchw = input_channel * p_input_height * p_input_width; - int p_dimcw = input_channel * p_input_width; - int p_dimc = input_channel; - - int t_dimchw = output_channel * output_height * output_width; - int t_dimhw = output_height * output_width; - int t_dimw = output_width; - - int o_dimchw = input_channel * input_height * input_width; - int o_dimhw = input_height * input_width; - int o_dimw = input_width; - - int nelems = kernel_size * kernel_size * input_channel; - - __shared__ T prod_sum[THREADS_PER_BLOCK]; - prod_sum[tch_off] = 0; - - for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) { - int i2 = (tc % displacement_size - displacement_rad) * stride2; - int j2 = (tc / displacement_size - displacement_rad) * stride2; - - int index2 = n * p_dimchw + (h + j2) * p_dimcw + (w + i2) * p_dimc + c; - - T val2 = rinput2[index2]; - for (int j = ymin; j <= ymax; ++j) { - for (int i = xmin; i <= xmax; ++i) { - int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i; - prod_sum[tch_off] += grad_output[t_index] * val2; - } - } - } - - __syncthreads(); - - if (tch_off == 0) { - T reduce_sum = 0; - for (int index = 0; index < THREADS_PER_BLOCK; index++) { - reduce_sum += prod_sum[index]; - } - const int index1 = n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size); - grad_input1[index1] = static_cast(reduce_sum / nelems); - } - -} - -template -__global__ void correlation_backward_input2(int item, T *grad_input2, const int input_channel, const int input_height, const int input_width, const T *grad_output, const int output_channel, const int output_height, const int output_width, const T *rinput1, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2){ - - int n = item; - int h = blockIdx.x * stride1 + pad_size; - int w = blockIdx.y * stride1 + pad_size; - int c = blockIdx.z; - - int tch_off = threadIdx.x; - - int kernel_rad = (kernel_size - 1) / 2; - int displacement_rad = max_displacement / stride2; - int displacement_size = 2 * displacement_rad + 1; - - int p_input_width = input_width + 2 * pad_size; - int p_input_height = input_height + 2 * pad_size; - int p_dimchw = input_channel * p_input_height * p_input_width; - int p_dimcw = input_channel * p_input_width; - int p_dimc = input_channel; - - int t_dimchw = output_channel * output_height * output_width; - int t_dimhw = output_height * output_width; - int t_dimw = output_width; - - int o_dimchw = input_channel * input_height * input_width; - int o_dimhw = input_height * input_width; - int o_dimw = input_width; - - int nelems = kernel_size * kernel_size * input_channel; - - __shared__ T prod_sum[THREADS_PER_BLOCK]; - prod_sum[tch_off] = 0; - - for (int tc = tch_off; tc < output_channel; tc += THREADS_PER_BLOCK) { - int i2 = (tc % displacement_size - displacement_rad) * stride2; - int j2 = (tc / displacement_size - displacement_rad) * stride2; - - int xmin = (w - kernel_rad - max_displacement - i2) / stride1; - int ymin = (h - kernel_rad - max_displacement - j2) / stride1; - - int xmax = (w + kernel_rad - max_displacement - i2) / stride1; - int ymax = (h + kernel_rad - max_displacement - j2) / stride1; - - if (xmax < 0 || ymax < 0 || xmin >= output_width || ymin >= output_height) { - continue; - } - - if (xmin > xmax || ymin > ymax) { - continue; - } - - xmin = max(0, xmin); - xmax = min(output_width - 1, xmax); - - ymin = max(0, ymin); - ymax = min(output_height - 1, ymax); - - int index1 = n * p_dimchw + (h - j2) * p_dimcw + (w - i2) * p_dimc + c; - T val1 = rinput1[index1]; - for (int j = ymin; j <= ymax; ++j) { - for (int i = xmin; i <= xmax; ++i) { - int t_index = n * t_dimchw + tc * t_dimhw + j * t_dimw + i; - prod_sum[tch_off] += grad_output[t_index] * val1; - } - } - } - - __syncthreads(); - - if (tch_off == 0) { - T reduce_sum = 0; - for (int index = 0; index < THREADS_PER_BLOCK; index++) { - reduce_sum += prod_sum[index]; - } - const int index2 = n * o_dimchw + c * o_dimhw + (h - pad_size) * o_dimw + (w - pad_size); - grad_input2[index2] = static_cast(reduce_sum / nelems); - } -} - -template -class CorrelationGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, "It must use CUDAPlace."); - const auto *input1 = ctx.Input("Input1"); - const auto *input2 = ctx.Input("Input2"); - const auto *grad_output = ctx.Input(framework::GradVarName("Output")); - const int pad_size = ctx.Attr("pad_size"); - const int kernel_size = ctx.Attr("kernel_size"); - const int stride1 = ctx.Attr("stride1"); - const int stride2 = ctx.Attr("stride2"); - const int max_displacement = ctx.Attr("max_displacement"); - const int corr_type_multiply = ctx.Attr("corr_type_multiply"); - - auto *grad_input1 = ctx.Output(framework::GradVarName("Input1")); - grad_input1->mutable_data(ctx.GetPlace()); - auto *grad_input2 = ctx.Output(framework::GradVarName("Input2")); - grad_input2->mutable_data(ctx.GetPlace()); - auto &dev_ctx = ctx.template device_context(); - - auto in_dims = input1->dims(); - int N = in_dims[0]; - int C = in_dims[1]; - int H = in_dims[2]; - int W = in_dims[3]; - - int padded_input_height = H + 2 * pad_size; - int padded_input_width = W + 2 * pad_size; - - Tensor rinput1 = ctx.AllocateTmpTensor({N, padded_input_height, padded_input_width, C}, dev_ctx); - rinput1.mutable_data(ctx.GetPlace()); - - Tensor rinput2 = ctx.AllocateTmpTensor({N, padded_input_height, padded_input_width, C}, dev_ctx); - rinput2.mutable_data(ctx.GetPlace()); - - set_zero<<<(rinput1.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput1.data(), rinput1.numel()); - set_zero<<<(rinput2.numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(rinput2.data(), rinput2.numel()); - set_zero<<<(grad_input1->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(grad_input1->data(), grad_input1->numel()); - set_zero<<<(grad_input2->numel() + 512 - 1)/512, 512, 0, dev_ctx.stream()>>>(grad_input2->data(), grad_input2->numel()); - - auto grad_out_dims = grad_output->dims(); - int GOC = grad_out_dims[1]; - int GOH = grad_out_dims[2]; - int GOW = grad_out_dims[3]; - - dim3 blocks_grid(N, H, W); - dim3 threads_block(THREADS_PER_BLOCK); - - channel_first<<>>(input1->data(), rinput1.data(), C, H, W, pad_size); - channel_first<<>>(input2->data(), rinput2.data(), C, H, W, pad_size); - - dim3 threadsPerBlock(THREADS_PER_BLOCK); - dim3 totalBlocksCorr(H, W, C); - - for (int n = 0; n < N; n++) { - correlation_backward_input1<<>>(n, grad_input1->data(), C, H, W, grad_output->data(), GOC, GOH, GOW, rinput2.data(), pad_size, kernel_size, max_displacement, stride1, stride2); - } - - for (int n = 0; n < N; n++) { - correlation_backward_input2<<>>(n, grad_input2->data(), C, H, W, grad_output->data(), GOC, GOH, GOW, rinput1.data(), pad_size, kernel_size, max_displacement, stride1, stride2); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL( - correlation, ops::CorrelationKernel, - ops::CorrelationKernel); -REGISTER_OP_CUDA_KERNEL( - correlation_grad, ops::CorrelationGradKernel, - ops::CorrelationGradKernel); - diff --git a/PaddleCV/Research/PWCNet/correlation_op/make.sh b/PaddleCV/Research/PWCNet/correlation_op/make.sh deleted file mode 100644 index 0aa8deb6b3db2908838dbba10b976e37979bf231..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/correlation_op/make.sh +++ /dev/null @@ -1,22 +0,0 @@ -include_dir=$( python3.7 -c 'import paddle; print(paddle.sysconfig.get_include())' ) -lib_dir=$( python3.7 -c 'import paddle; print(paddle.sysconfig.get_lib())' ) - -echo $include_dir -echo $lib_dir - -OPS='correlation_op' -for op in ${OPS} -do -nvcc ${op}.cu -c -o ${op}.cu.o -ccbin cc -DPADDLE_WITH_CUDA -DEIGEN_USE_GPU -DPADDLE_USE_DSO -DPADDLE_WITH_MKLDNN -Xcompiler -fPIC -std=c++11 -Xcompiler -fPIC -w --expt-relaxed-constexpr -O0 -g -DNVCC \ - -I ${include_dir}/third_party/ \ - -I ${include_dir} -done - -##g++-4.8 correlation_op.cu.o correlation_op.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \ -g++ correlation_op.cu.o correlation_op.cc -o correlation_lib.so -DPADDLE_WITH_MKLDNN -shared -fPIC -std=c++11 -O0 -g \ - -I ${include_dir}/third_party/ \ - -I ${include_dir} \ - -L ${lib_dir} \ - -L /usr/local/cuda/lib64 -lpaddle_framework -lcudart - -rm *.cu.o diff --git a/PaddleCV/Research/PWCNet/correlation_op/test_correlation.py b/PaddleCV/Research/PWCNet/correlation_op/test_correlation.py deleted file mode 100644 index 89e254adafe41465be93f98cef837cc6514bf9db..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/correlation_op/test_correlation.py +++ /dev/null @@ -1,88 +0,0 @@ -import unittest -from correlation import correlation -import numpy as np -import paddle.fluid as fluid -from paddle.fluid.dygraph.base import to_variable - -def corr(x_1, x_2, pad_size=4, kernel_size=1, max_displacement=4, stride1=1, stride2=1, corr_multiply=1): - K = kernel_size - # rinput1 = np.pad(x_1, tuple([pad_size for _ in range(4)]), mode='constant').transpose(1, 2).transpose(2, 3) - # rinput2 = np.pad(x_2, tuple([pad_size for _ in range(4)]), mode='constant').transpose(1, 2).transpose(2, 3) - - rinput1 = np.pad(x_1, ((0, 0), (0, 0), (pad_size, pad_size), (pad_size, pad_size)), mode='constant') - rinput2 = np.pad(x_2, ((0, 0), (0, 0), (pad_size, pad_size), (pad_size, pad_size)), mode='constant') - rinput1 = np.transpose(rinput1, (0, 2, 3, 1)) - rinput2 = np.transpose(rinput2, (0, 2, 3, 1)) - B = int(rinput1.shape[0]) - H = int(x_1.shape[2]) - W = int(x_2.shape[3]) - d = max_displacement - D = 2 * d + 1 - output = np.zeros((B, D * D, H, W), dtype=np.float32) - - for b in range(B): - for i in range(H): - for j in range(W): - for k in range(-d, d + 1): - for l in range(-d, d + 1): - x1_index = i + pad_size - y1_index = j + pad_size - x2_index = x1_index + k - y2_index = y1_index + l - output[b, l + d + D * (k + d), i, j] = np.mean( - rinput1[b, x1_index:x1_index + K, y1_index:y1_index + K] * rinput2[b, - x2_index:x2_index + K, - y2_index:y2_index + K]) - - return output - -class TestCorrelationOp(unittest.TestCase): - def test_check_output(self): - #x_shape = (1, 196, 3, 3) - np.random.seed(13) - np.set_printoptions(threshold=np.inf) - x_shape = (2, 10, 3, 3) - x_type = 'float32' - x1 = fluid.layers.data(name='x1', shape=x_shape, dtype=x_type, append_batch_size=False) - x2 = fluid.layers.data(name='x2', shape=x_shape, dtype=x_type, append_batch_size=False) - - x1_np = np.random.randn(2,3,4,5).astype(x_type) - x2_np = np.random.randn(2,3,4,5).astype(x_type) - out_np = corr(x1_np, x2_np, pad_size=4, kernel_size=1, max_displacement=4, stride1=1, stride2=1) - - out = correlation(x1, x2, pad_size=4, kernel_size=1, max_displacement=4, stride1=1, stride2=1) - - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - res = exe.run(feed={'x1':x1_np, 'x2':x2_np}, fetch_list=[out.name]) - - self.assertTrue(np.allclose(res[0], out_np)) - -class Net(fluid.dygraph.Layer): - def __init__(self, name_scope): - super(Net, self).__init__(name_scope) - def forward(self, x1, x2): - y = correlation(x1, x2, pad_size=4, kernel_size=1, max_displacement=4, stride1=1, stride2=1) - return y - -class TestCorrelationOpDyGraph(unittest.TestCase): - def test_check_output(self): - np.random.seed(13) - np.set_printoptions(threshold=np.inf) - x_shape = (2, 10, 3, 3) - x_type = 'float32' - place = fluid.CUDAPlace(0) - with fluid.dygraph.guard(place): - x1_np = np.random.randn(2,3,4,5).astype(x_type) - x2_np = np.random.randn(2,3,4,5).astype(x_type) - out_np = corr(x1_np, x2_np, pad_size=4, kernel_size=1, max_displacement=4, stride1=1, stride2=1) - - x1 = to_variable(x1_np) - x2 = to_variable(x2_np) - corr_pd = Net('corr_pd') - y = corr_pd(x1, x2) - out = y.numpy() - self.assertTrue(np.allclose(out, out_np)) - -if __name__ == '__main__': - unittest.main() diff --git a/PaddleCV/Research/PWCNet/data/__init__.py b/PaddleCV/Research/PWCNet/data/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/PaddleCV/Research/PWCNet/data/datasets.py b/PaddleCV/Research/PWCNet/data/datasets.py deleted file mode 100644 index 080e875df614c6ad8499822b492c85555321b338..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/data/datasets.py +++ /dev/null @@ -1,475 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# @FileName: datasets.py reference https://github.com/NVIDIA/flownet2-pytorch/blob/master/datasets.py -import paddle -import paddle.fluid as fluid -import numpy as np -import argparse -import os, math, random -import sys -from os.path import * -import numpy as np -from glob import glob -sys.path.append('../') -import data.utils.frame_utils as frame_utils -from scipy.misc import imsave -from src import flow_vis -from src.read_files import read_txt_to_index - - -class StaticRandomCrop(object): - def __init__(self, image_size, crop_size): - self.th, self.tw = crop_size - h, w = image_size - self.h1 = random.randint(0, h - self.th) - self.w1 = random.randint(0, w - self.tw) - - def __call__(self, img): - return img[self.h1:(self.h1 + self.th), self.w1:(self.w1 + self.tw), :] - - -class StaticCenterCrop(object): - def __init__(self, image_size, crop_size): - self.th, self.tw = crop_size - self.h, self.w = image_size - - def __call__(self, img): - return img[(self.h - self.th) // 2:(self.h + self.th) // 2, (self.w - self.tw) // 2:(self.w + self.tw) // 2, :] - - -class MpiSintel(object): - def __init__(self, args, is_cropped=False, root='', dstype='clean', replicates=1): - self.args = args - self.is_cropped = is_cropped - self.crop_size = args.crop_size - self.render_size = args.inference_size - self.replicates = replicates - - flow_root = join(root, 'flow') - image_root = join(root, dstype) - - file_list = sorted(glob(join(flow_root, '*/*.flo'))) - - self.flow_list = [] - self.image_list = [] - - for file in file_list: - if 'test' in file: - # print file - continue - - fbase = file[len(flow_root) + 1:] - fprefix = fbase[:-8] - fnum = int(fbase[-8:-4]) - - img1 = join(image_root, fprefix + "%04d" % (fnum + 0) + '.png') - img2 = join(image_root, fprefix + "%04d" % (fnum + 1) + '.png') - - if not isfile(img1) or not isfile(img2) or not isfile(file): - continue - - self.image_list += [[img1, img2]] - self.flow_list += [file] - - self.size = len(self.image_list) - - self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape - - if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0] % 64) or ( - self.frame_size[1] % 64): - self.render_size[0] = ((self.frame_size[0]) // 64) * 64 - self.render_size[1] = ((self.frame_size[1]) // 64) * 64 - - args.inference_size = self.render_size - - assert (len(self.image_list) == len(self.flow_list)) - - def __getitem__(self, index): - - index = index % self.size - - img1 = frame_utils.read_gen(self.image_list[index][0]) - img2 = frame_utils.read_gen(self.image_list[index][1]) - - flow = frame_utils.read_gen(self.flow_list[index]) - - images = [img1, img2] - image_size = img1.shape[:2] - - if self.is_cropped: - cropper = StaticRandomCrop(image_size, self.crop_size) - else: - cropper = StaticCenterCrop(image_size, self.render_size) - images = list(map(cropper, images)) - flow = cropper(flow) - - images = np.array(images).transpose(3, 0, 1, 2) - flow = flow.transpose(2, 0, 1) - return [images], [flow] - - def __len__(self): - return self.size * self.replicates - - -class MpiSintelClean(MpiSintel): - def __init__(self, args, is_cropped=False, root='', replicates=1): - super(MpiSintelClean, self).__init__(args, is_cropped=is_cropped, root=root, dstype='clean', - replicates=replicates) - - -class MpiSintelFinal(MpiSintel): - def __init__(self, args, is_cropped=False, root='', replicates=1): - super(MpiSintelFinal, self).__init__(args, is_cropped=is_cropped, root=root, dstype='final', - replicates=replicates) - - -class FlyingChairs(object): - def __init__(self, train_val, args, is_cropped, txt_file, root='/path/to/FlyingChairs_release/data', replicates=1): - self.args = args - self.is_cropped = is_cropped - self.crop_size = args.crop_size - self.render_size = args.inference_size - self.replicates = replicates - - images = sorted(glob(join(root, '*.ppm'))) - - flow_list = sorted(glob(join(root, '*.flo'))) - - assert (len(images) // 2 == len(flow_list)) - - image_list = [] - for i in range(len(flow_list)): - im1 = images[2 * i] - im2 = images[2 * i + 1] - image_list += [[im1, im2]] - - assert len(image_list) == len(flow_list) - if train_val == 'train': - intindex = np.array(read_txt_to_index(txt_file)) - image_list = np.array(image_list) - image_list = image_list[intindex == 1] - image_list = image_list.tolist() - flow_list = np.array(flow_list) - flow_list = flow_list[intindex == 1] - flow_list = flow_list.tolist() - assert len(image_list) == len(flow_list) - elif train_val == 'val': - intindex = np.array(read_txt_to_index(txt_file)) - image_list = np.array(image_list) - image_list = image_list[intindex == 2] - image_list = image_list.tolist() - flow_list = np.array(flow_list) - flow_list = flow_list[intindex == 2] - flow_list = flow_list.tolist() - assert len(image_list) == len(flow_list) - else: - raise ValueError('FlyingChairs_train_val.txt not found for txt_file ......') - self.flow_list = flow_list - self.image_list = image_list - - self.size = len(self.image_list) - - self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape - - if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0] % 64) or ( - self.frame_size[1] % 64): - self.render_size[0] = ((self.frame_size[0]) // 64) * 64 - self.render_size[1] = ((self.frame_size[1]) // 64) * 64 - - args.inference_size = self.render_size - - def __getitem__(self, index): - index = index % self.size - - img1 = frame_utils.read_gen(self.image_list[index][0]) - img2 = frame_utils.read_gen(self.image_list[index][1]) - - flow = frame_utils.read_gen(self.flow_list[index]) - - images = [img1, img2] - image_size = img1.shape[:2] - if self.is_cropped: - cropper = StaticRandomCrop(image_size, self.crop_size) - else: - cropper = StaticCenterCrop(image_size, self.render_size) - images = list(map(cropper, images)) - flow = cropper(flow) - - images = np.array(images).transpose(3, 0, 1, 2) - flow = flow.transpose(2, 0, 1) - return [images], [flow] - - def __len__(self): - return self.size * self.replicates - - -def reader_flyingchairs(dataset): - n = len(dataset) - - def reader(): - for i in range(n): - a, b = dataset[i] - yield a[0][:,0,:,:].transpose(1,2,0), a[0][:,1,:,:].transpose(1,2,0), b[0].transpose(1, 2, 0)# a single entry of data is created each time - return reader - - -class FlyingThings(object): - def __init__(self, args, is_cropped, root='/path/to/flyingthings3d', dstype='frames_cleanpass', replicates=1): - self.args = args - self.is_cropped = is_cropped - self.crop_size = args.crop_size - self.render_size = args.inference_size - self.replicates = replicates - - image_dirs = sorted(glob(join(root, dstype, 'TRAIN/*/*'))) - image_dirs = sorted([join(f, 'left') for f in image_dirs] + [join(f, 'right') for f in image_dirs]) - - flow_dirs = sorted(glob(join(root, 'optical_flow_flo_format/TRAIN/*/*'))) - flow_dirs = sorted( - [join(f, 'into_future/left') for f in flow_dirs] + [join(f, 'into_future/right') for f in flow_dirs]) - - assert (len(image_dirs) == len(flow_dirs)) - - self.image_list = [] - self.flow_list = [] - - for idir, fdir in zip(image_dirs, flow_dirs): - images = sorted(glob(join(idir, '*.png'))) - flows = sorted(glob(join(fdir, '*.flo'))) - for i in range(len(flows)): - self.image_list += [[images[i], images[i + 1]]] - self.flow_list += [flows[i]] - - assert len(self.image_list) == len(self.flow_list) - - self.size = len(self.image_list) - - self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape - - if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0] % 64) or ( - self.frame_size[1] % 64): - self.render_size[0] = ((self.frame_size[0]) // 64) * 64 - self.render_size[1] = ((self.frame_size[1]) // 64) * 64 - - args.inference_size = self.render_size - - def __getitem__(self, index): - index = index % self.size - - img1 = frame_utils.read_gen(self.image_list[index][0]) - img2 = frame_utils.read_gen(self.image_list[index][1]) - - flow = frame_utils.read_gen(self.flow_list[index]) - - images = [img1, img2] - image_size = img1.shape[:2] - if self.is_cropped: - cropper = StaticRandomCrop(image_size, self.crop_size) - else: - cropper = StaticCenterCrop(image_size, self.render_size) - images = list(map(cropper, images)) - flow = cropper(flow) - - images = np.array(images).transpose(3, 0, 1, 2) - flow = flow.transpose(2, 0, 1) - return [images], [flow] - - def __len__(self): - return self.size * self.replicates - - -class FlyingThingsClean(FlyingThings): - def __init__(self, args, is_cropped=False, root='', replicates=1): - super(FlyingThingsClean, self).__init__(args, is_cropped=is_cropped, root=root, dstype='frames_cleanpass', - replicates=replicates) - - -class FlyingThingsFinal(FlyingThings): - def __init__(self, args, is_cropped=False, root='', replicates=1): - super(FlyingThingsFinal, self).__init__(args, is_cropped=is_cropped, root=root, dstype='frames_finalpass', - replicates=replicates) - - -class ChairsSDHom(object): - def __init__(self, args, is_cropped, root='/path/to/chairssdhom/data', dstype='train', replicates=1): - self.args = args - self.is_cropped = is_cropped - self.crop_size = args.crop_size - self.render_size = args.inference_size - self.replicates = replicates - - image1 = sorted(glob(join(root, dstype, 't0/*.png'))) - image2 = sorted(glob(join(root, dstype, 't1/*.png'))) - self.flow_list = sorted(glob(join(root, dstype, 'flow/*.flo'))) - - assert (len(image1) == len(self.flow_list)) - - self.image_list = [] - for i in range(len(self.flow_list)): - im1 = image1[i] - im2 = image2[i] - self.image_list += [[im1, im2]] - - assert len(self.image_list) == len(self.flow_list) - - self.size = len(self.image_list) - - self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape - - if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0] % 64) or ( - self.frame_size[1] % 64): - self.render_size[0] = ((self.frame_size[0]) // 64) * 64 - self.render_size[1] = ((self.frame_size[1]) // 64) * 64 - - args.inference_size = self.render_size - - def __getitem__(self, index): - index = index % self.size - - img1 = frame_utils.read_gen(self.image_list[index][0]) - img2 = frame_utils.read_gen(self.image_list[index][1]) - - flow = frame_utils.read_gen(self.flow_list[index]) - flow = flow[::-1, :, :] - - images = [img1, img2] - image_size = img1.shape[:2] - if self.is_cropped: - cropper = StaticRandomCrop(image_size, self.crop_size) - else: - cropper = StaticCenterCrop(image_size, self.render_size) - images = list(map(cropper, images)) - flow = cropper(flow) - - images = np.array(images).transpose(3, 0, 1, 2) - flow = flow.transpose(2, 0, 1) - return [images], [flow] - - def __len__(self): - return self.size * self.replicates - - -class ChairsSDHomTrain(ChairsSDHom): - def __init__(self, args, is_cropped=False, root='', replicates=1): - super(ChairsSDHomTrain, self).__init__(args, is_cropped=is_cropped, root=root, dstype='train', - replicates=replicates) - - -class ChairsSDHomTest(ChairsSDHom): - def __init__(self, args, is_cropped=False, root='', replicates=1): - super(ChairsSDHomTest, self).__init__(args, is_cropped=is_cropped, root=root, dstype='test', - replicates=replicates) - - -class ImagesFromFolder(object): - def __init__(self, args, is_cropped, root='/path/to/frames/only/folder', iext='png', replicates=1): - self.args = args - self.is_cropped = is_cropped - self.crop_size = args.crop_size - self.render_size = args.inference_size - self.replicates = replicates - - images = sorted(glob(join(root, '*.' + iext))) - self.image_list = [] - for i in range(len(images) - 1): - im1 = images[i] - im2 = images[i + 1] - self.image_list += [[im1, im2]] - - self.size = len(self.image_list) - - self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape - - if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0] % 64) or ( - self.frame_size[1] % 64): - self.render_size[0] = ((self.frame_size[0]) // 64) * 64 - self.render_size[1] = ((self.frame_size[1]) // 64) * 64 - - args.inference_size = self.render_size - - def __getitem__(self, index): - index = index % self.size - - img1 = frame_utils.read_gen(self.image_list[index][0]) - img2 = frame_utils.read_gen(self.image_list[index][1]) - - images = [img1, img2] - image_size = img1.shape[:2] - if self.is_cropped: - cropper = StaticRandomCrop(image_size, self.crop_size) - else: - cropper = StaticCenterCrop(image_size, self.render_size) - images = list(map(cropper, images)) - - images = np.array(images).transpose(3, 0, 1, 2) - return [images], [np.zeros(images.size()[0:1] + (2,) + images.size()[-2:])] - - def __len__(self): - return self.size * self.replicates - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - args = parser.parse_args() - args.inference_size = [1080, 1920] - args.crop_size = [384, 512] - - index = 50 - flyingchairs_dataset = FlyingChairs(args, True, root='/ssd2/zhenghe/DATA/FlyingChairs_release/data') - # a, b = flyingchairs_dataset[index] - # im1 = a[0][:,0,:,:].transpose(1,2,0) - # im2 = a[0][:,1,:,:].transpose(1,2,0) - # flo = b[0].transpose(1, 2, 0) / 20.0 - # flow_color = flow_vis.flow_to_color(flo, convert_to_bgr=False) - # imsave('./hsv_pd.png', flow_color) - sample_num = len(flyingchairs_dataset) - reader = reader_flyingchairs(flyingchairs_dataset) - BATCH_SIZE = 8 - train_batch_reader = paddle.batch(reader, BATCH_SIZE, drop_last=True) - epoch_num = 1 - - with fluid.dygraph.guard(): - for epoch in range(epoch_num): - for batch_id, data in enumerate(train_batch_reader()): - im1_data = np.array( - [x[0] for x in data]).astype('float32') - im2_data = np.array( - [x[1] for x in data]).astype('float32') - flo_data = np.array( - [x[2] for x in data]).astype('float32') - if batch_id % 500 == 0: - # if batch_id < 10: - print(batch_id) - print(im1_data.shape) - print(im2_data.shape) - print(flo_data.shape) - im1 = im1_data[0, :, :, :] - im2 = im2_data[0, :, :, :] - flo = flo_data[0, :, :, :] - print(im1.shape) - print(im2.shape) - print(flo.shape) - imsave('./img1.png', im1) - imsave('./img2.png', im2) - flow_color = flow_vis.flow_to_color(flo, convert_to_bgr=False) - imsave('./hsv_pd.png', flow_color) - print("batch_id:", batch_id) - print(batch_id * BATCH_SIZE) - print(sample_num) - # img = fluid.dygraph.to_variable(dy_x_data) - - - - - diff --git a/PaddleCV/Research/PWCNet/data/download.sh b/PaddleCV/Research/PWCNet/data/download.sh deleted file mode 100755 index 8a0c5dad4d5fb233be56050983bf1f0b293944d0..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/data/download.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -#mkdir FlyingThings3D_release -#cd FlyingThings3D_release -# -#wget http://lmb.informatik.uni-freiburg.de/data/SceneFlowDatasets_CVPR16/Release_april16/data/FlyingThings3D/raw_data/flyingthings3d__frames_cleanpass.tar -#wget http://lmb.informatik.uni-freiburg.de/data/SceneFlowDatasets_CVPR16/Release_april16/data/FlyingThings3D/derived_data/flyingthings3d__optical_flow.tar.bz2 -# -#tar xvf flyingthings3d__frames_cleanpass.tar -#tar xvf flyingthings3d__optical_flow.tar.bz2 -# -#cd .. -wget http://lmb.informatik.uni-freiburg.de/resources/datasets/FlyingChairs/FlyingChairs.zip -unzip FlyingChairs.zip - -#wget https://lmb.informatik.uni-freiburg.de/data/FlowNet2/ChairsSDHom/ChairsSDHom.tar.gz -#tar xvzf ChairsSDHom.tar.gz diff --git a/PaddleCV/Research/PWCNet/data/frame_0010.png b/PaddleCV/Research/PWCNet/data/frame_0010.png deleted file mode 100644 index 80df246723859bb1e0aaca2f41944537cdc18d70..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/PWCNet/data/frame_0010.png and /dev/null differ diff --git a/PaddleCV/Research/PWCNet/data/frame_0011.png b/PaddleCV/Research/PWCNet/data/frame_0011.png deleted file mode 100644 index 0ee97e97a7eba203eb6f67f032f81a8fbdb2c3ed..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/PWCNet/data/frame_0011.png and /dev/null differ diff --git a/PaddleCV/Research/PWCNet/data/utils/__init__.py b/PaddleCV/Research/PWCNet/data/utils/__init__.py deleted file mode 100644 index 139597f9cb07c5d48bed18984ec4747f4b4f3438..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/data/utils/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/PaddleCV/Research/PWCNet/data/utils/flow_utils.py b/PaddleCV/Research/PWCNet/data/utils/flow_utils.py deleted file mode 100644 index 4ee0ecbb16a92bb9f738d278b61a18862ad518a5..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/data/utils/flow_utils.py +++ /dev/null @@ -1,57 +0,0 @@ -import numpy as np - -TAG_CHAR = np.array([202021.25], np.float32) - - -def readFlow(fn): - """ Read .flo file in Middlebury format""" - # Code adapted from: - # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy - - # WARNING: this will work on little-endian architectures (eg Intel x86) only! - # print 'fn = %s'%(fn) - with open(fn, 'rb') as f: - magic = np.fromfile(f, np.float32, count=1) - if 202021.25 != magic: - print('Magic number incorrect. Invalid .flo file') - return None - else: - w = np.fromfile(f, np.int32, count=1) - h = np.fromfile(f, np.int32, count=1) - # print 'Reading %d x %d flo file\n' % (w, h) - data = np.fromfile(f, np.float32, count=2 * int(w) * int(h)) - # Reshape data into 3D array (columns, rows, bands) - # The reshape here is for visualization, the original code is (w,h,2) - return np.resize(data, (int(h), int(w), 2)) - - -def writeFlow(filename, uv, v=None): - """ Write optical flow to file. - - If v is None, uv is assumed to contain both u and v channels, - stacked in depth. - Original code by Deqing Sun, adapted from Daniel Scharstein. - """ - nBands = 2 - - if v is None: - assert (uv.ndim == 3) - assert (uv.shape[2] == 2) - u = uv[:, :, 0] - v = uv[:, :, 1] - else: - u = uv - - assert (u.shape == v.shape) - height, width = u.shape - f = open(filename, 'wb') - # write the header - f.write(TAG_CHAR) - np.array(width).astype(np.int32).tofile(f) - np.array(height).astype(np.int32).tofile(f) - # arrange into matrix form - tmp = np.zeros((height, width * nBands)) - tmp[:, np.arange(width) * 2] = u - tmp[:, np.arange(width) * 2 + 1] = v - tmp.astype(np.float32).tofile(f) - f.close() \ No newline at end of file diff --git a/PaddleCV/Research/PWCNet/data/utils/frame_utils.py b/PaddleCV/Research/PWCNet/data/utils/frame_utils.py deleted file mode 100644 index 40a8ea5a206aec428241ac7674de83a1a4099de0..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/data/utils/frame_utils.py +++ /dev/null @@ -1,18 +0,0 @@ -import numpy as np -from os.path import * -from scipy.misc import imread -from . import flow_utils - -def read_gen(file_name): - ext = splitext(file_name)[-1] - if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg': - im = imread(file_name) - if im.shape[2] > 3: - return im[:,:,:3] - else: - return im - elif ext == '.bin' or ext == '.raw': - return np.load(file_name) - elif ext == '.flo': - return flow_utils.readFlow(file_name).astype(np.float32) - return [] \ No newline at end of file diff --git a/PaddleCV/Research/PWCNet/finetune.sh b/PaddleCV/Research/PWCNet/finetune.sh deleted file mode 100755 index 29d2e802da3cc3fa13413ab768071e19d59e3147..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/finetune.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash -python3 train.py --loss l1 --pretrained ./out/pwc_net_paddle --dataset FlyingChairs --train_val_txt data_dir/FlyingChairs_release/FlyingChairs_train_val.txt --data_root data_dir/FlyingChairs_release/data - -# use multi gpus NEED TO DO LATER -#python3 -m paddle.distributed.launch --selected_gpus=0,1 train.py --use_multi_gpu --batch_size 40 --loss l1 --pretrained ./out/pwc_net_paddle --dataset FlyingChairs --train_val_txt data_dir/FlyingChairs_release/FlyingChairs_train_val.txt --data_root data_dir/FlyingChairs_release/data diff --git a/PaddleCV/Research/PWCNet/infer.py b/PaddleCV/Research/PWCNet/infer.py deleted file mode 100644 index 717c18f02c017e910b4a86e09616386668822e8a..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/infer.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Infer for PWCNet.""" -import sys -import pickle -import time -import cv2 -import numpy as np -from math import ceil -from scipy.ndimage import imread -from scipy.misc import imsave -import paddle.fluid as fluid -from models.model import PWCDCNet -from src import flow_vis - - - -def writeFlowFile(filename, uv): - """ - According to the matlab code of Deqing Sun and c++ source code of Daniel Scharstein - Contact: dqsun@cs.brown.edu - Contact: schar@middlebury.edu - """ - TAG_STRING = np.array(202021.25, dtype=np.float32) - if uv.shape[2] != 2: - sys.exit("writeFlowFile: flow must have two bands!"); - H = np.array(uv.shape[0], dtype=np.int32) - W = np.array(uv.shape[1], dtype=np.int32) - with open(filename, 'wb') as f: - f.write(TAG_STRING.tobytes()) - f.write(W.tobytes()) - f.write(H.tobytes()) - f.write(uv.tobytes()) - - -def load_dict(filename_): - with open(filename_, 'rb') as f: - ret_di = pickle.load(f) - return ret_di - - -def pad_input(x0): - intWidth = x0.shape[2] - intHeight = x0.shape[3] - if intWidth != ((intWidth >> 6) << 6): - intWidth_pad = (((intWidth >> 6) + 1) << 6) # more than necessary - intPaddingLeft = int((intWidth_pad - intWidth) / 2) - intPaddingRight = intWidth_pad - intWidth - intPaddingLeft - else: - intWidth_pad = intWidth - intPaddingLeft = 0 - intPaddingRight = 0 - - if intHeight != ((intHeight >> 6) << 6): - intHeight_pad = (((intHeight >> 6) + 1) << 6) # more than necessary - intPaddingTop = int((intHeight_pad - intHeight) / 2) - intPaddingBottom = intHeight_pad - intHeight - intPaddingTop - else: - intHeight_pad = intHeight - intPaddingTop = 0 - intPaddingBottom = 0 - - out = fluid.layers.pad2d(input=x0, - paddings=[intPaddingLeft, intPaddingRight, intPaddingTop, intPaddingBottom], - mode='edge') - - return out, [intPaddingLeft, intPaddingRight, intPaddingTop, intPaddingBottom, intWidth, intHeight] - - -def main(): - im1_fn = 'data/frame_0010.png' - im2_fn = 'data/frame_0011.png' - flow_fn = './tmp/frame_0010_pd.flo' - if len(sys.argv) > 1: - im1_fn = sys.argv[1] - if len(sys.argv) > 2: - im2_fn = sys.argv[2] - if len(sys.argv) > 3: - flow_fn = sys.argv[3] - - im_all = [imread(img) for img in [im1_fn, im2_fn]] - im_all = [im[:, :, :3] for im in im_all] - - # rescale the image size to be multiples of 64 - divisor = 64. - H = im_all[0].shape[0] - W = im_all[0].shape[1] - print('origin shape : ', H, W) - - H_ = int(ceil(H / divisor) * divisor) - W_ = int(ceil(W / divisor) * divisor) - print('resize shape: ', H_, W_) - for i in range(len(im_all)): - im_all[i] = cv2.resize(im_all[i], (W_, H_)) - - for _i, _inputs in enumerate(im_all): - im_all[_i] = im_all[_i][:, :, ::-1] - im_all[_i] = 1.0 * im_all[_i] / 255.0 - im_all[_i] = np.transpose(im_all[_i], (2, 0, 1)) - im_all = np.concatenate((im_all[0], im_all[1]), axis=0).astype(np.float32) - im_all = im_all[np.newaxis, :, :, :] - - with fluid.dygraph.guard(place=fluid.CUDAPlace(0)): - im_all = fluid.dygraph.to_variable(im_all) - im_all, [intPaddingLeft, intPaddingRight, intPaddingTop, intPaddingBottom, intWidth, intHeight] = pad_input( - im_all) - - model = PWCDCNet("pwcnet") - model.eval() - pd_pretrain, _ = fluid.dygraph.load_dygraph("paddle_model/pwc_net_paddle") - model.set_dict(pd_pretrain) - start = time.time() - flo = model(im_all) - end = time.time() - print('Time of PWCNet model for one infer step: ', end - start) - flo = flo[0].numpy() * 20.0 - # scale the flow back to the input size - flo = np.swapaxes(np.swapaxes(flo, 0, 1), 1, 2) - flo = flo[intPaddingTop * 2:intPaddingTop * 2 + intHeight * 2, - intPaddingLeft * 2: intPaddingLeft * 2 + intWidth * 2, :] - u_ = cv2.resize(flo[:, :, 0], (W, H)) - v_ = cv2.resize(flo[:, :, 1], (W, H)) - u_ *= W / float(W_) - v_ *= H / float(H_) - flo = np.dstack((u_, v_)) - - # # Apply the coloring (for OpenCV, set convert_to_bgr=True) - flow_color = flow_vis.flow_to_color(flo, convert_to_bgr=False) - imsave('./tmp/hsv_pd.png', flow_color) - - writeFlowFile(flow_fn, flo) - - -if __name__ == '__main__': - main() - - diff --git a/PaddleCV/Research/PWCNet/models/__init__.py b/PaddleCV/Research/PWCNet/models/__init__.py deleted file mode 100644 index 44a41a91f24512697caec6068c7ce1f4101c93b5..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/models/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import models.model diff --git a/PaddleCV/Research/PWCNet/models/model.py b/PaddleCV/Research/PWCNet/models/model.py deleted file mode 100644 index 435e9f4dbc375251468906ca0f33ac3c79701804..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/models/model.py +++ /dev/null @@ -1,277 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle.fluid as fluid -from paddle.fluid.dygraph import Conv2D, Conv2DTranspose -from correlation_op.correlation import correlation - - -class PWCDCNet(fluid.dygraph.Layer): - def __init__(self, name_scope, md=4): - super(PWCDCNet, self).__init__(name_scope) - self.param_attr = fluid.ParamAttr( - name='conv_weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0004), - initializer=fluid.initializer.MSRAInitializer(uniform=True, fan_in=None, seed=0)) - self.md = md - self.conv1a = Conv2D("conv1a", 16, filter_size=3, stride=2, padding=1, param_attr=self.param_attr) - self.conv1aa = Conv2D("conv1aa", 16, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv1b = Conv2D("conv1b", 16, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv2a = Conv2D("conv2a", 32, filter_size=3, stride=2, padding=1, param_attr=self.param_attr) - self.conv2aa = Conv2D("conv2aa", 32, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv2b = Conv2D("conv2b", 32, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv3a = Conv2D("conv3a", 64, filter_size=3, stride=2, padding=1, param_attr=self.param_attr) - self.conv3aa = Conv2D("conv3aa", 64, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv3b = Conv2D("conv3b", 64, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv4a = Conv2D("conv4a", 96, filter_size=3, stride=2, padding=1, param_attr=self.param_attr) - self.conv4aa = Conv2D("conv4aa", 96, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv4b = Conv2D("conv4b", 96, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv5a = Conv2D("conv5a", 128, filter_size=3, stride=2, padding=1, param_attr=self.param_attr) - self.conv5aa = Conv2D("conv5aa", 128, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv5b = Conv2D("conv5b", 128, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv6aa = Conv2D("conv6aa", 196, filter_size=3, stride=2, padding=1, param_attr=self.param_attr) - self.conv6a = Conv2D("conv6a", 196, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv6b = Conv2D("conv6b", 196, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - - self.conv6_0 = Conv2D("conv6_0", 128, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv6_1 = Conv2D("conv6_1", 128, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv6_2 = Conv2D("conv6_2", 96, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv6_3 = Conv2D("conv6_3", 64, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv6_4 = Conv2D("conv6_4", 32, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.predict_flow6 = Conv2D("predict_flow6", 2, filter_size=3,stride=1,padding=1, param_attr=self.param_attr) - self.deconv6 = Conv2DTranspose("deconv6", 2, filter_size=4, stride=2, padding=1, param_attr=self.param_attr) - self.upfeat6 = Conv2DTranspose("upfeat6", 2, filter_size=4, stride=2, padding=1, param_attr=self.param_attr) - - self.conv5_0 = Conv2D("conv5_0", 128, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv5_1 = Conv2D("conv5_1", 128, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv5_2 = Conv2D("conv5_2", 96, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv5_3 = Conv2D("conv5_3", 64, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv5_4 = Conv2D("conv5_4", 32, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.predict_flow5 = Conv2D("predict_flow5", 2, filter_size=3,stride=1,padding=1, param_attr=self.param_attr) - self.deconv5 = Conv2DTranspose("deconv5", 2, filter_size=4, stride=2, padding=1, param_attr=self.param_attr) - self.upfeat5 = Conv2DTranspose("upfeat5", 2, filter_size=4, stride=2, padding=1, param_attr=self.param_attr) - - self.conv4_0 = Conv2D("conv4_0", 128, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv4_1 = Conv2D("conv4_1", 128, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv4_2 = Conv2D("conv4_2", 96, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv4_3 = Conv2D("conv4_3", 64, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv4_4 = Conv2D("conv4_4", 32, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.predict_flow4 = Conv2D("predict_flow4", 2, filter_size=3,stride=1,padding=1, param_attr=self.param_attr) - self.deconv4 = Conv2DTranspose("deconv4", 2, filter_size=4, stride=2, padding=1, param_attr=self.param_attr) - self.upfeat4 = Conv2DTranspose("upfeat4", 2, filter_size=4, stride=2, padding=1, param_attr=self.param_attr) - - self.conv3_0 = Conv2D("conv3_0", 128, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv3_1 = Conv2D("conv3_1", 128, filter_size=3, stride=1, padding=1 ,param_attr=self.param_attr) - self.conv3_2 = Conv2D("conv3_2", 96, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv3_3 = Conv2D("conv3_3", 64, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv3_4 = Conv2D("conv3_4", 32, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.predict_flow3 = Conv2D("predict_flow3", 2, filter_size=3,stride=1,padding=1, param_attr=self.param_attr) - self.deconv3 = Conv2DTranspose("deconv3", 2, filter_size=4, stride=2, padding=1, param_attr=self.param_attr) - self.upfeat3 = Conv2DTranspose("upfeat3", 2, filter_size=4, stride=2, padding=1, param_attr=self.param_attr) - - self.conv2_0 = Conv2D("conv2_0", 128, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv2_1 = Conv2D("conv2_1", 128, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv2_2 = Conv2D("conv2_2", 96, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv2_3 = Conv2D("conv2_3", 64, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.conv2_4 = Conv2D("conv2_4", 32, filter_size=3, stride=1, padding=1, param_attr=self.param_attr) - self.predict_flow2 = Conv2D("predict_flow2", 2, filter_size=3,stride=1,padding=1, param_attr=self.param_attr) - self.deconv2 = Conv2DTranspose("deconv2", 2, filter_size=4, stride=2, padding=1, param_attr=self.param_attr) - - self.dc_conv1 = Conv2D("dc_conv1", 128, filter_size=3, stride=1, padding=1, dilation=1, param_attr=self.param_attr) - self.dc_conv2 = Conv2D("dc_conv2", 128, filter_size=3, stride=1, padding=2, dilation=2, param_attr=self.param_attr) - self.dc_conv3 = Conv2D("dc_conv3", 128, filter_size=3, stride=1, padding=4, dilation=4, param_attr=self.param_attr) - self.dc_conv4 = Conv2D("dc_conv4", 96, filter_size=3, stride=1, padding=8, dilation=8, param_attr=self.param_attr) - self.dc_conv5 = Conv2D("dc_conv5", 64, filter_size=3, stride=1, padding=16, dilation=16, param_attr=self.param_attr) - self.dc_conv6 = Conv2D("dc_conv6", 32, filter_size=3, stride=1, padding=1, dilation=1, param_attr=self.param_attr) - self.dc_conv7 = Conv2D("dc_conv7", 2, filter_size=3,stride=1,padding=1, param_attr=self.param_attr) - - def warp(self, x, flo): - """ - warp an image/tensor (im2) back to im1, according to the optical flow - - x: [B, C, H, W] (im2) - flo: [B, 2, H, W] flow - - """ - - B, C, H, W = x.shape - # mesh grid - xx_pd = fluid.layers.range(0, W, 1, 'float32') - xx_pd = fluid.layers.reshape(xx_pd, shape=[1, -1]) - xx_pd = fluid.layers.expand(x=xx_pd, expand_times=[H, 1]) - xx_pd = fluid.layers.reshape(xx_pd, shape=[1, 1, H, W]) - xx_pd = fluid.layers.expand(x=xx_pd, expand_times=[B, 1, 1, 1]) - - yy_pd = fluid.layers.range(0, H, 1, 'float32') - yy_pd = fluid.layers.reshape(yy_pd, shape=[-1, 1]) - yy_pd = fluid.layers.expand(x=yy_pd, expand_times=[1, W]) - yy_pd = fluid.layers.reshape(x=yy_pd, shape=[1, 1, H, W]) - yy_pd = fluid.layers.expand(x=yy_pd, expand_times=[B, 1, 1, 1]) - grid_pd = fluid.layers.concat(input=[xx_pd, yy_pd], axis=1) - flo_pd = flo - vgrid_pd = fluid.layers.elementwise_add(grid_pd, flo_pd) - vgrid_pd_0 = 2.0 * fluid.layers.slice(vgrid_pd, axes=[1], starts=[0], ends=[1]) / max(W - 1, 1) - 1.0 - vgrid_pd_1 = 2.0 * fluid.layers.slice(vgrid_pd, axes=[1], starts=[1], ends=[2]) / max(H - 1, 1) - 1.0 - vgrid_pd = fluid.layers.concat(input=[vgrid_pd_0, vgrid_pd_1], axis=1) - vgrid_pd = fluid.layers.transpose(vgrid_pd, [0, 2, 3, 1]) - output = fluid.layers.grid_sampler(name='grid_sample', x=x, grid=vgrid_pd) - - mask = fluid.layers.zeros_like(x) - mask = mask + 1.0 - mask = fluid.layers.grid_sampler(name='grid_sample', x=mask, grid=vgrid_pd) - mask_temp1 = fluid.layers.cast(mask < 0.9990, 'float32') - mask = mask * (1 - mask_temp1) - mask = fluid.layers.cast(mask > 0, 'float32') - outwarp = fluid.layers.elementwise_mul(output, mask) - - return outwarp - - def corr(self, x_1, x_2): - out = correlation(x_1, x_2, pad_size=self.md, kernel_size=1, max_displacement=self.md, - stride1=1, stride2=1, corr_type_multiply=1) - return out - - def forward(self, x, output_more=False): - im1 = fluid.layers.slice(x, axes=[1], starts=[0], ends=[3]) - im2 = fluid.layers.slice(x, axes=[1], starts=[3], ends=[6]) - # print("\n\n***************************PWC Net details *************** \n\n") - c11 = fluid.layers.leaky_relu(self.conv1a(im1), 0.1) - c11 = fluid.layers.leaky_relu(self.conv1aa(c11), 0.1) - c11 = fluid.layers.leaky_relu(self.conv1b(c11), 0.1) - - c21 = fluid.layers.leaky_relu(self.conv1a(im2), 0.1) - c21 = fluid.layers.leaky_relu(self.conv1aa(c21), 0.1) - c21 = fluid.layers.leaky_relu(self.conv1b(c21), 0.1) - - c12 = fluid.layers.leaky_relu(self.conv2a(c11), 0.1) - c12 = fluid.layers.leaky_relu(self.conv2aa(c12), 0.1) - c12 = fluid.layers.leaky_relu(self.conv2b(c12), 0.1) - - c22 = fluid.layers.leaky_relu(self.conv2a(c21), 0.1) - c22 = fluid.layers.leaky_relu(self.conv2aa(c22), 0.1) - c22 = fluid.layers.leaky_relu(self.conv2b(c22), 0.1) - - c13 = fluid.layers.leaky_relu(self.conv3a(c12), 0.1) - c13 = fluid.layers.leaky_relu(self.conv3aa(c13), 0.1) - c13 = fluid.layers.leaky_relu(self.conv3b(c13), 0.1) - - c23 = fluid.layers.leaky_relu(self.conv3a(c22), 0.1) - c23 = fluid.layers.leaky_relu(self.conv3aa(c23), 0.1) - c23 = fluid.layers.leaky_relu(self.conv3b(c23), 0.1) - - c14 = fluid.layers.leaky_relu(self.conv4a(c13), 0.1) - c14 = fluid.layers.leaky_relu(self.conv4aa(c14), 0.1) - c14 = fluid.layers.leaky_relu(self.conv4b(c14), 0.1) - - c24 = fluid.layers.leaky_relu(self.conv4a(c23), 0.1) - c24 = fluid.layers.leaky_relu(self.conv4aa(c24), 0.1) - c24 = fluid.layers.leaky_relu(self.conv4b(c24), 0.1) - - c15 = fluid.layers.leaky_relu(self.conv5a(c14), 0.1) - c15 = fluid.layers.leaky_relu(self.conv5aa(c15), 0.1) - c15 = fluid.layers.leaky_relu(self.conv5b(c15), 0.1) - - c25 = fluid.layers.leaky_relu(self.conv5a(c24), 0.1) - c25 = fluid.layers.leaky_relu(self.conv5aa(c25), 0.1) - c25 = fluid.layers.leaky_relu(self.conv5b(c25), 0.1) - - c16 = fluid.layers.leaky_relu(self.conv6aa(c15), 0.1) - c16 = fluid.layers.leaky_relu(self.conv6a(c16), 0.1) - c16 = fluid.layers.leaky_relu(self.conv6b(c16), 0.1) - - c26 = fluid.layers.leaky_relu(self.conv6aa(c25), 0.1) - c26 = fluid.layers.leaky_relu(self.conv6a(c26), 0.1) - c26 = fluid.layers.leaky_relu(self.conv6b(c26), 0.1) - - corr6 = self.corr(c16, c26) - corr6 = fluid.layers.leaky_relu(corr6, alpha=0.1) - - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv6_0(corr6), 0.1), corr6], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv6_1(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv6_2(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv6_3(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv6_4(x), 0.1), x], axis=1) - - flow6 = self.predict_flow6(x) - up_flow6 = self.deconv6(flow6) - up_feat6 = self.upfeat6(x) - - warp5 = self.warp(c25, up_flow6 * 0.625) - corr5 = self.corr(c15, warp5) - corr5 = fluid.layers.leaky_relu(corr5, alpha=0.1) - - x = fluid.layers.concat(input=[corr5, c15, up_flow6, up_feat6], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv5_0(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv5_1(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv5_2(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv5_3(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv5_4(x), 0.1), x], axis=1) - - flow5 = self.predict_flow5(x) - up_flow5 = self.deconv5(flow5) - up_feat5 = self.upfeat5(x) - - warp4 = self.warp(c24, up_flow5 * 1.25) - corr4 = self.corr(c14, warp4) - corr4 = fluid.layers.leaky_relu(corr4, alpha=0.1) - - x = fluid.layers.concat(input=[corr4, c14, up_flow5, up_feat5], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv4_0(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv4_1(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv4_2(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv4_3(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv4_4(x), 0.1), x], axis=1) - - flow4 = self.predict_flow4(x) - up_flow4 = self.deconv4(flow4) - up_feat4 = self.upfeat4(x) - - warp3 = self.warp(c23, up_flow4 * 2.5) - corr3 = self.corr(c13, warp3) - corr3 = fluid.layers.leaky_relu(corr3, alpha=0.1) - - x = fluid.layers.concat(input=[corr3, c13, up_flow4, up_feat4], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv3_0(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv3_1(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv3_2(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv3_3(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv3_4(x), 0.1), x], axis=1) - - flow3 = self.predict_flow3(x) - up_flow3 = self.deconv3(flow3) - up_feat3 = self.upfeat3(x) - - warp2 = self.warp(c22, up_flow3 * 5.0) - corr2 = self.corr(c12, warp2) - corr2 = fluid.layers.leaky_relu(corr2, alpha=0.1) - - x = fluid.layers.concat(input=[corr2, c12, up_flow3, up_feat3], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv2_0(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv2_1(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv2_2(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv2_3(x), 0.1), x], axis=1) - x = fluid.layers.concat(input=[fluid.layers.leaky_relu(self.conv2_4(x), 0.1), x], axis=1) - - flow2 = self.predict_flow2(x) - - x = fluid.layers.leaky_relu(self.dc_conv4(fluid.layers.leaky_relu( - self.dc_conv3(fluid.layers.leaky_relu(self.dc_conv2(fluid.layers.leaky_relu(self.dc_conv1(x), 0.1)), 0.1)), - 0.1)), 0.1) - flow2 += self.dc_conv7( - fluid.layers.leaky_relu(self.dc_conv6(fluid.layers.leaky_relu(self.dc_conv5(x), 0.1)), 0.1)) - if not output_more: - return flow2 - else: - return [flow2, flow3, flow4, flow5, flow6] - diff --git a/PaddleCV/Research/PWCNet/my_args.py b/PaddleCV/Research/PWCNet/my_args.py deleted file mode 100644 index bb673efe10534ba319fa240c09f05d044be76d4b..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/my_args.py +++ /dev/null @@ -1,17 +0,0 @@ -import argparse - -parser = argparse.ArgumentParser(description='PWCNet_paddle') -parser.add_argument('--dataset', default='FlyingChairs', help='dataset type : FlyingChairs') -parser.add_argument('--data_root', default='', help='the path of selected datasets') -parser.add_argument('--model_out_dir', default='./out', help='the path of selected datasets') -parser.add_argument('--loss', default='l2', help='loss type : first train with l2 and finetune with l1') -parser.add_argument('--train_val_txt', default='', help='the path of selected train_val_txt of dataset') -parser.add_argument('--numEpoch', '-e', type=int, default=100, help='Number of epochs to train') -parser.add_argument('--batch_size', '-b', type=int, default=40, help='batch size') -parser.add_argument('--pretrained', default=None, help='path to the pretrained model weights') -parser.add_argument('--optimize', default=None, help='path to the pretrained optimize weights') -parser.add_argument('--use_multi_gpu',action = 'store_true', help='Enable multi gpu mode') - -args = parser.parse_args() -args.inference_size = [384, 512] -args.crop_size = [384, 448] \ No newline at end of file diff --git a/PaddleCV/Research/PWCNet/paddle_model/pwc_net_chairs_paddle.pdparams b/PaddleCV/Research/PWCNet/paddle_model/pwc_net_chairs_paddle.pdparams deleted file mode 100755 index 1b8a626b6bd1c5d30e65154bc6bb54f336716b25..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/PWCNet/paddle_model/pwc_net_chairs_paddle.pdparams and /dev/null differ diff --git a/PaddleCV/Research/PWCNet/paddle_model/pwc_net_paddle.pdparams b/PaddleCV/Research/PWCNet/paddle_model/pwc_net_paddle.pdparams deleted file mode 100755 index 6e947b41ca33f8871bb72d3ad1e8f0b709c8f354..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/PWCNet/paddle_model/pwc_net_paddle.pdparams and /dev/null differ diff --git a/PaddleCV/Research/PWCNet/src/__init__.py b/PaddleCV/Research/PWCNet/src/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/PaddleCV/Research/PWCNet/src/flow_vis.py b/PaddleCV/Research/PWCNet/src/flow_vis.py deleted file mode 100644 index d2fe36828f829151ec307f1b4e1dc687b4ecc8b3..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/src/flow_vis.py +++ /dev/null @@ -1,163 +0,0 @@ -# MIT License -# -# Copyright (c) 2018 Tom Runia -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to conditions. -# -# Author: Tom Runia -# Date Created: 2018-08-03 - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import numpy as np - - -def make_colorwheel(): - ''' - Generates a color wheel for optical flow visualization as presented in: - Baker et al. "A Database and Evaluation Methodology for Optical Flow" (ICCV, 2007) - URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf - - According to the C++ source code of Daniel Scharstein - According to the Matlab source code of Deqing Sun - ''' - - RY = 15 - YG = 6 - GC = 4 - CB = 11 - BM = 13 - MR = 6 - - ncols = RY + YG + GC + CB + BM + MR - colorwheel = np.zeros((ncols, 3)) - col = 0 - - # RY - colorwheel[0:RY, 0] = 255 - colorwheel[0:RY, 1] = np.floor(255*np.arange(0,RY)/RY) - col = col+RY - # YG - colorwheel[col:col+YG, 0] = 255 - np.floor(255*np.arange(0,YG)/YG) - colorwheel[col:col+YG, 1] = 255 - col = col+YG - # GC - colorwheel[col:col+GC, 1] = 255 - colorwheel[col:col+GC, 2] = np.floor(255*np.arange(0,GC)/GC) - col = col+GC - # CB - colorwheel[col:col+CB, 1] = 255 - np.floor(255*np.arange(CB)/CB) - colorwheel[col:col+CB, 2] = 255 - col = col+CB - # BM - colorwheel[col:col+BM, 2] = 255 - colorwheel[col:col+BM, 0] = np.floor(255*np.arange(0,BM)/BM) - col = col+BM - # MR - colorwheel[col:col+MR, 2] = 255 - np.floor(255*np.arange(MR)/MR) - colorwheel[col:col+MR, 0] = 255 - return colorwheel - - -def flow_compute_color(u, v, convert_to_bgr=False): - ''' - Applies the flow color wheel to (possibly clipped) flow components u and v. - - According to the C++ source code of Daniel Scharstein - According to the Matlab source code of Deqing Sun - - :param u: np.ndarray, input horizontal flow - :param v: np.ndarray, input vertical flow - :param convert_to_bgr: bool, whether to change ordering and output BGR instead of RGB - :return: - ''' - - flow_image = np.zeros((u.shape[0], u.shape[1], 3), np.uint8) - - colorwheel = make_colorwheel() # shape [55x3] - ncols = colorwheel.shape[0] - - rad = np.sqrt(np.square(u) + np.square(v)) - a = np.arctan2(-v, -u)/np.pi - - fk = (a+1) / 2*(ncols-1) - k0 = np.floor(fk).astype(np.int32) - k1 = k0 + 1 - k1[k1 == ncols] = 0 - f = fk - k0 - - for i in range(colorwheel.shape[1]): - - tmp = colorwheel[:,i] - col0 = tmp[k0] / 255.0 - col1 = tmp[k1] / 255.0 - col = (1-f)*col0 + f*col1 - - idx = (rad <= 1) - col[idx] = 1 - rad[idx] * (1-col[idx]) - col[~idx] = col[~idx] * 0.75 # out of range? - - # Note the 2-i => BGR instead of RGB - ch_idx = 2-i if convert_to_bgr else i - flow_image[:,:,ch_idx] = np.floor(255 * col) - - return flow_image - - -def flow_to_color(flow_uv, clip_flow=None, convert_to_bgr=False): - ''' - Expects a two dimensional flow image of shape [H,W,2] - - According to the C++ source code of Daniel Scharstein - According to the Matlab source code of Deqing Sun - - :param flow_uv: np.ndarray of shape [H,W,2] - :param clip_flow: float, maximum clipping value for flow - :return: - ''' - assert flow_uv.ndim == 3, 'input flow must have three dimensions' - assert flow_uv.shape[2] == 2, 'input flow must have shape [H,W,2]' - - if clip_flow is not None: - flow_uv = np.clip(flow_uv, 0, clip_flow) - - u = flow_uv[:,:,0] - v = flow_uv[:,:,1] - - rad = np.sqrt(np.square(u) + np.square(v)) - rad_max = np.max(rad) - - epsilon = 1e-5 - u = u / (rad_max + epsilon) - v = v / (rad_max + epsilon) - return flow_compute_color(u, v, convert_to_bgr) - - -def read_flow(filename): - """ - https://github.com/sampepose/flownet2-tf/blob/master/src/flowlib.py - read optical flow from Middlebury .flo file - :param filename: name of the flow file - :return: optical flow data in matrix - """ - f = open(filename, 'rb') - magic = np.fromfile(f, np.float32, count=1) - data2d = None - - if 202021.25 != magic: - print('Magic number incorrect. Invalid .flo file') - else: - w = np.fromfile(f, np.int32, count=1) - h = np.fromfile(f, np.int32, count=1) - print("Reading %d x %d flo file" % (h, w)) - data2d = np.fromfile(f, np.float32, count=2 * w[0] * h[0]) - # reshape data into 3D array (columns, rows, channels) - data2d = np.resize(data2d, (h[0], w[0], 2)) - f.close() - return data2d diff --git a/PaddleCV/Research/PWCNet/src/multiscaleloss.py b/PaddleCV/Research/PWCNet/src/multiscaleloss.py deleted file mode 100644 index a52a74acf278fde4a99335af21459050fd28a7ef..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/src/multiscaleloss.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle -import paddle.fluid as fluid - - -def EPE(input_flow, target_flow, loss_type, sparse=False, mean=True): - if loss_type == 'l1': - EPE_map = fluid.layers.abs(input_flow - target_flow) - else: - EPE_map = fluid.layers.square(input_flow - target_flow) - if sparse: #TODO mask = (target_flow[:,0] == 0) & (target_flow[:,1] == 0) EPE_map = EPE_map[~mask] - mask_temp1 = fluid.layers.cast(target_flow[:, 0] == 0, 'float32') - mask_temp2 = fluid.layers.cast(target_flow[:, 1] == 0, 'float32') - mask = 1 - fluid.layers.elementwise_mul(mask_temp1, mask_temp2) - mask = fluid.layers.reshape(mask, [mask.shape[0], 1, mask.shape[1], mask.shape[2]]) - mask = fluid.layers.concat([mask, mask], 1) - EPE_map = EPE_map * mask - - if mean: - return fluid.layers.mean(EPE_map) - else: - batch_size = EPE_map.shape[0] - res_sum = fluid.layers.reduce_sum(EPE_map) - res = res_sum / batch_size - return res - - -def sparse_max_pool(input, size): - '''Downsample the input by considering 0 values as invalid. - - Unfortunately, no generic interpolation mode can resize a sparse map correctly, - the strategy here is to use max pooling for positive values and "min pooling" - for negative values, the two results are then summed. - This technique allows sparsity to be minized, contrary to nearest interpolation, - which could potentially lose information for isolated data points.''' - - positive = fluid.layers.cast(input > 0, 'float32') - negative = fluid.layers.cast(input < 0, 'float32') - output = fluid.layers.adaptive_pool2d(input * positive, size) - fluid.layers.adaptive_pool2d(-input * negative, - size) - return output - - -def multiscaleEPE(network_output, target_flow, loss_type, weights=None, sparse=False): - def one_scale(output, target, sparse, loss_type): - if sparse: - h = output.shape[2] - w = output.shape[3] - target_scaled = sparse_max_pool(target, [h, w]) - else: - target_scaled = fluid.layers.resize_bilinear(target, out_shape=[output.shape[2], - output.shape[3]], - align_corners=False, align_mode=False) - return EPE(output, target_scaled, loss_type=loss_type, sparse=sparse, mean=False) - - if type(network_output) not in [tuple, list]: - network_output = [network_output] - if weights is None: - weights = [0.005, 0.01, 0.02, 0.08, 0.32] # as in original article - assert(len(weights) == len(network_output)) - - loss = 0 - for output, weight in zip(network_output, weights): - loss += weight * one_scale(output, target_flow, sparse, loss_type) - return loss - - -def realEPE(output, target, sparse=False): - upsampled_output = fluid.layers.resize_bilinear(output, out_shape=[target.shape[2], - target.shape[3]], - align_corners=False, align_mode=False) - return EPE(upsampled_output, target, sparse, mean=True) - diff --git a/PaddleCV/Research/PWCNet/src/read_files.py b/PaddleCV/Research/PWCNet/src/read_files.py deleted file mode 100644 index 743a57ddc2552c668c5a76b3511659c861ab160f..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/src/read_files.py +++ /dev/null @@ -1,22 +0,0 @@ -def read_txt(videoTxt): - with open(videoTxt, 'r') as f: - videolist = f.readlines() - return videolist - - -def read_txt_to_index(file): - data = read_txt(file) - data = list(map(int, data)) - return data - - -def main(): - file = 'data_dir/FlyingChairs_release/FlyingChairs_train_val.txt' - data = read_txt_to_index(file) - data = list(map(int, data)) - print(data) - print(len(data)) - - -if __name__ == '__main__': - main() diff --git a/PaddleCV/Research/PWCNet/tmp/hsv_pd.png b/PaddleCV/Research/PWCNet/tmp/hsv_pd.png deleted file mode 100755 index 0ebc10300e6d3e93260ddbec59bc9d002958c01a..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/PWCNet/tmp/hsv_pd.png and /dev/null differ diff --git a/PaddleCV/Research/PWCNet/tmp/hsv_pd_chairs.png b/PaddleCV/Research/PWCNet/tmp/hsv_pd_chairs.png deleted file mode 100755 index cc3249bf0ca991502f715f26e29a723b9319b8ce..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/PWCNet/tmp/hsv_pd_chairs.png and /dev/null differ diff --git a/PaddleCV/Research/PWCNet/train.py b/PaddleCV/Research/PWCNet/train.py deleted file mode 100644 index 7dc3b05edf1ccd2b59594e5c4a157e90b9390735..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/train.py +++ /dev/null @@ -1,275 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Trainer for PWCNet.""" -import sys -import os -os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = "0.99999" -os.environ["FLAGS_eager_delete_tensor_gb"] = "0" -import pickle -import time -import cv2 -import numpy as np -import paddle -import paddle.fluid as fluid -from scipy.misc import imsave -from src import flow_vis -from models.model import PWCDCNet -from data.datasets import FlyingChairs, reader_flyingchairs -from src.multiscaleloss import multiscaleEPE, realEPE -from AverageMeter import * -from my_args import args - - -def writeFlowFile(filename, uv): - """ - According to the matlab code of Deqing Sun and c++ source code of Daniel Scharstein - Contact: dqsun@cs.brown.edu - Contact: schar@middlebury.edu - """ - TAG_STRING = np.array(202021.25, dtype=np.float32) - if uv.shape[2] != 2: - sys.exit("writeFlowFile: flow must have two bands!"); - H = np.array(uv.shape[0], dtype=np.int32) - W = np.array(uv.shape[1], dtype=np.int32) - with open(filename, 'wb') as f: - f.write(TAG_STRING.tobytes()) - f.write(W.tobytes()) - f.write(H.tobytes()) - f.write(uv.tobytes()) - - -def load_dict(filename_): - with open(filename_, 'rb') as f: - ret_di = pickle.load(f) - return ret_di - - -def pad_input(x0): - intWidth = x0.shape[2] - intHeight = x0.shape[3] - if intWidth != ((intWidth >> 6) << 6): - intWidth_pad = (((intWidth >> 6) + 1) << 6) # more than necessary - intPaddingLeft = int((intWidth_pad - intWidth) / 2) - intPaddingRight = intWidth_pad - intWidth - intPaddingLeft - else: - intWidth_pad = intWidth - intPaddingLeft = 0 - intPaddingRight = 0 - - if intHeight != ((intHeight >> 6) << 6): - intHeight_pad = (((intHeight >> 6) + 1) << 6) # more than necessary - intPaddingTop = int((intHeight_pad - intHeight) / 2) - intPaddingBottom = intHeight_pad - intHeight - intPaddingTop - else: - intHeight_pad = intHeight - intPaddingTop = 0 - intPaddingBottom = 0 - - out = fluid.layers.pad2d(input=x0, - paddings=[intPaddingLeft, intPaddingRight, intPaddingTop, intPaddingBottom], - mode='edge') - - return out, [intPaddingLeft, intPaddingRight, intPaddingTop, intPaddingBottom, intWidth, intHeight] - - -def val(model, batch_reader, epoch, batch_num): - model.eval() - loss_cnt = AverageMeter() - for batch_id, data in enumerate(batch_reader()): - start = time.time() - im1_data = np.array( - [x[0] for x in data]).astype('float32') - im2_data = np.array( - [x[1] for x in data]).astype('float32') - flo_data = np.array( - [x[2] for x in data]).astype('float32') - step = im1_data.shape[0] - - im_all = np.concatenate((im1_data, im2_data), axis=3).astype(np.float32) - im_all = im_all / 255.0 - im_all = np.swapaxes(np.swapaxes(im_all, 1, 2), 1, 3) - label = flo_data / 20.0 - label = np.swapaxes(np.swapaxes(label, 1, 2), 1, 3) - - im_all = fluid.dygraph.to_variable(im_all) - label = fluid.dygraph.to_variable(label) - # im_all, [intPaddingLeft, intPaddingRight, intPaddingTop, intPaddingBottom, intWidth, intHeight] = pad_input( - # im_all) - - end = time.time() - read_data_time = end - start - start = time.time() - network_output = model(im_all, output_more=False) - loss = realEPE(network_output, label) - end = time.time() - loss_cnt.update(loss.numpy()[0], step) - print('val epoch {} batch {}/{} run time: {}s read data time {}s loss {}'.format(epoch, batch_id, batch_num, - round(end - start, 2), - round(read_data_time, 2), - loss.numpy())) - return round(loss_cnt.avg, 4) - - -def train(model, train_batch_reader, adam, epoch, batch_num, args): - loss_type = args.loss - model.train() - for batch_id, data in enumerate(train_batch_reader()): - start = time.time() - im1_data = np.array( - [x[0] for x in data]).astype('float32') - im2_data = np.array( - [x[1] for x in data]).astype('float32') - flo_data = np.array( - [x[2] for x in data]).astype('float32') - im_all = np.concatenate((im1_data, im2_data), axis=3).astype(np.float32) - im_all = im_all / 255.0 - im_all = np.swapaxes(np.swapaxes(im_all, 1, 2), 1, 3) - label = flo_data / 20.0 - label = np.swapaxes(np.swapaxes(label, 1, 2), 1, 3) - if batch_id % 10 == 0: - im1 = im_all[0, :3, :, :] * 255 - im2 = im_all[0, 3:, :, :] * 255 - im1 = np.swapaxes(np.swapaxes(im1, 0, 1), 1, 2).astype(np.uint8) - im2 = np.swapaxes(np.swapaxes(im2, 0, 1), 1, 2).astype(np.uint8) - - flo = label[0, :, :, :] * 20 - flo = np.swapaxes(np.swapaxes(flo, 0, 1), 1, 2) - imsave('./img1.png', im1) - imsave('./img2.png', im2) - flow_color = flow_vis.flow_to_color(flo, convert_to_bgr=False) - imsave('./hsv_pd.png', flow_color) - H = im_all[0].shape[1] - W = im_all[0].shape[2] - - im_all = fluid.dygraph.to_variable(im_all) - label = fluid.dygraph.to_variable(label) - im_all, [intPaddingLeft, intPaddingRight, intPaddingTop, intPaddingBottom, intWidth, intHeight] = pad_input( - im_all) - - label, _ = pad_input(label) - end = time.time() - read_data_time = end - start - start = time.time() - network_output = model(im_all, output_more=True) - if batch_id % 10 == 0: - flo = network_output[0][0].numpy() * 20.0 - # scale the flow back to the input size - flo = np.swapaxes(np.swapaxes(flo, 0, 1), 1, 2) - flo = flo[intPaddingTop * 2:intPaddingTop * 2 + intHeight * 2, - intPaddingLeft * 2: intPaddingLeft * 2 + intWidth * 2, :] - - u_ = cv2.resize(flo[:, :, 0], (W, H)) - v_ = cv2.resize(flo[:, :, 1], (W, H)) - flo = np.dstack((u_, v_)) - flow_color = flow_vis.flow_to_color(flo, convert_to_bgr=False) - imsave('./hsv_predict.png', flow_color) - loss = multiscaleEPE(network_output, label, loss_type, weights=None, sparse=False) - - end = time.time() - loss.backward() - if args.use_multi_gpu: - model.apply_collective_grads() - adam.minimize(loss) - model.clear_gradients() - print('epoch {} batch {}/{} run time: {}s read data time {}s loss {}'.format(epoch, batch_id, batch_num, - round(end - start, 2), - round(read_data_time, 2), - loss.numpy())) - - -def main(): - print(args) - if args.use_multi_gpu: - place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) - else: - place = fluid.CUDAPlace(0) - - with fluid.dygraph.guard(place=place): - if args.use_multi_gpu: - strategy = fluid.dygraph.parallel.prepare_context() - model = PWCDCNet("pwcnet") - if args.pretrained: - print('-----------load pretrained model:', args.pretrained) - pd_pretrain, _ = fluid.dygraph.load_dygraph(args.pretrained) - model.set_dict(pd_pretrain) - - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.0001, regularization=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0004)) - if args.optimize: - print('--------------load pretrained model:', args.optimize) - adam_pretrain, _ = fluid.dygraph.load_dygraph(args.optimize) - adam.set_dict(adam_pretrain) - if args.use_multi_gpu: - model = fluid.dygraph.parallel.DataParallel(model, strategy) - - if args.dataset == 'FlyingChairs': - train_flyingchairs_dataset = FlyingChairs('train', args, is_cropped=True, txt_file=args.train_val_txt, - root=args.data_root) - val_flyingchairs_dataset = FlyingChairs('val', args, is_cropped=False, txt_file=args.train_val_txt, - root=args.data_root) - else: - raise ValueError('dataset name is wrong, please fix it by using args.dataset') - - train_sample_num = len(train_flyingchairs_dataset) - val_sample_num = len(val_flyingchairs_dataset) - print('train sample num: ', train_sample_num) - print('val sample num: ', val_sample_num) - train_reader = reader_flyingchairs(train_flyingchairs_dataset) - val_reader = reader_flyingchairs(val_flyingchairs_dataset) - if args.use_multi_gpu: - train_reader = fluid.contrib.reader.distributed_batch_reader( - train_reader) - val_reader = fluid.contrib.reader.distributed_batch_reader( - val_reader) - BATCH_SIZE = args.batch_size - train_batch_num = round(train_sample_num / BATCH_SIZE) - val_batch_num = round(val_sample_num / BATCH_SIZE) - train_batch_reader = paddle.batch(paddle.reader.shuffle(train_reader, buf_size=BATCH_SIZE * 100), BATCH_SIZE, - drop_last=True) - val_batch_reader = paddle.batch(val_reader, BATCH_SIZE, drop_last=False) - epoch_num = args.numEpoch - val_value = 100000000 - rm_best_model = "" - - for epoch in range(epoch_num): - train(model, train_batch_reader, adam, epoch, train_batch_num, args) - pd_save_dir = args.model_out_dir - if not os.path.exists(pd_save_dir): - os.makedirs(pd_save_dir) - pd_model_save = os.path.join(pd_save_dir, 'epoch_' + str(epoch) + "_pwc_net_paddle") - rm_dir = os.path.join(pd_save_dir, 'epoch_' + str(epoch - 1) + "_pwc_net_paddle.pdparams") - if os.path.exists(rm_dir): - os.remove(rm_dir) - if args.use_multi_gpu: - if fluid.dygraph.parallel.Env().local_rank == 0: - fluid.dygraph.save_dygraph(model.state_dict(), pd_model_save) - fluid.dygraph.save_dygraph(adam.state_dict(), os.path.join(pd_save_dir, 'adam')) - else: - fluid.dygraph.save_dygraph(model.state_dict(), pd_model_save) - fluid.dygraph.save_dygraph(adam.state_dict(), os.path.join(pd_save_dir, 'adam')) - val_loss_value = val(model, val_batch_reader, epoch, val_batch_num) - if val_loss_value < val_value: - best_model = os.path.join(pd_save_dir, "pwc_net_paddle_" + str(val_loss_value) + '.pdparams') - os.link(pd_model_save + '.pdparams', best_model) - if os.path.exists(rm_best_model): - os.remove(rm_best_model) - rm_best_model = best_model - val_value = val_loss_value - - -if __name__ == '__main__': - main() - - - diff --git a/PaddleCV/Research/PWCNet/train.sh b/PaddleCV/Research/PWCNet/train.sh deleted file mode 100755 index 7c2b7226bef96ebdbbe6c768255a8419e0d32de0..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/PWCNet/train.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env bash -python3 train.py --dataset FlyingChairs --train_val_txt data_dir/FlyingChairs_release/FlyingChairs_train_val.txt --data_root data_dir/FlyingChairs_release/data -# use multi gpus NEED TO DO LATER -#python3 -m paddle.distributed.launch --selected_gpus=0,1 --log_dir ./mylog train.py --use_multi_gpu --batch_size 20 --dataset FlyingChairs --train_val_txt data_dir/FlyingChairs_release/FlyingChairs_train_val.txt --data_root data_dir/FlyingChairs_release/data diff --git a/PaddleCV/Research/SemSegPaddle/README.md b/PaddleCV/Research/SemSegPaddle/README.md deleted file mode 100644 index f4fd9a731b947c95accbaff6686e8d272debf076..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/README.md +++ /dev/null @@ -1,139 +0,0 @@ -# SemSegPaddle: A Paddle-based Framework for Deep Learning in Semantic Segmentation - -This is a Paddle implementation of semantic segmentation models on multiple datasets, including Cityscapes, Pascal Context, and ADE20K. - -## Updates - -- [**2020/01/08**] We release ***PSPNet-ResNet101*** and ***GloRe-ResNet101*** models on Pascal Context and Cityscapes datasets. - -## Highlights - -Synchronized Batch Normlization is important for segmenation. - - The implementation is easy to use as it is pure-python, no any C++ extra extension libs. - - - Paddle provides sync_batch_norm. - - -## Support models - -We split our models into backbone and decoder network, where backbone network are transfered from classification networks. - -Backbone: - - ResNet - - ResNeXt - - HRNet - - EfficientNet - -Decoder: - - PSPNet: [Pyramid Scene Parsing Network](http://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf) - - DeepLabv3: [Rethinking Atrous Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1706.05587) - - GloRe: [Graph-Based Global Reasoning Networks](http://openaccess.thecvf.com/content_CVPR_2019/papers/Chen_Graph-Based_Global_Reasoning_Networks_CVPR_2019_paper.pdf) - - GINet: [GINet: Graph Interaction Netowrk for Scene Parsing]() - - - -## Peformance - - - Performance of Cityscapes validation set. - -**Method** | **Backbone** | **lr** | **BatchSize** | **epoch** | **mean IoU (Single-scale)** | **Trained weights** | -------------|:------------:|:----------:|:--------------:|:------------:|:---------------------------:|------------------------| -PSPNet | resnet101 | 0.01 | 8 | 80 | 78.1 | [pspnet_resnet_cityscapes_epoch_80.pdparams](https://pan.baidu.com/s/1adfvtq2JnLKRv_j7lOmW1A)| -GloRe | resnet101 | 0.01 | 8 | 80 | 78.4 | [pspnet_resnet_pascalcontext_epoch_80.pdparams](https://pan.baidu.com/s/1r4SbrYKbVk38c0dXZLAi9w) | - - - - Performance of Pascal-context validation set. - -**Method** | **Backbone** | **lr** | **BatchSize** | **epoch** | **mean IoU (Single-scale)** | **Trained weights** | -------------|:------------:|:----------:|:--------------:|:------------:|:---------------------------:|:----------------------:| -PSPNet | resnet101 | 0.005 | 16 | 80 | 48.9 | [glore_resnet_cityscapes_epoch_80.pdparams](https://pan.baidu.com/s/1l7-sqt2DsUunD9l4YivgQw) | -GloRe | resnet101 | 0.005 | 16 | 80 | 48.4 | [glore_resnet_pascalcontext_epoch_80.pdparams](https://pan.baidu.com/s/1rVuk7OfSj-AXR3ZCFGNmKg) | - - -## Environment - -This repo is developed under the following configurations: - - - Hardware: 4 GPUs for training, 1 GPU for testing - - Software: Centos 6.10, ***CUDA>=9.2 Python>=3.6, Paddle>=1.6*** - - -## Quick start: training and testing models - -### 1. Preparing data - -Download the [Cityscapes](https://www.cityscapes-dataset.com/) dataset. It should have this basic structure: - - cityscapes/ - ├── cityscapes_list - │ ├── test.lst - │ ├── train.lst - │ ├── train+.lst - │ ├── train++.lst - │ ├── trainval.lst - │ └── val.lst - ├── gtFine - │ ├── test - │ ├── train - │ └── val - ├── leftImg8bit - │ ├── test - │ ├── train - │ └── val - ├── license.txt - └── README - - Download Pascal-Context dataset. It should have this basic structure: - - pascalContext/ - ├── GroundTruth_trainval_mat - ├── GroundTruth_trainval_png - ├── JPEGImages - ├── pascal_context_train.txt - ├── pascal_context_val.txt - ├── README.md - └── VOCdevkit - - Then, create symlinks for the Cityscapes and Pascal-Context datasets - ``` - cd SemSegPaddle/data - ln -s $cityscapes ./ - ln -s $pascalContext ./ - ``` - -### 2. Download pretrained weights - Downlaod pretrained [resnet-101](https://pan.baidu.com/s/1niXBDZnLlUIulB7FY068DQ) weights file, and put it into the directory: ***./pretrained_model*** - - Then, run the following command: -``` - tar -zxvf ./repretrained/resnet101_v2.tgz -C pretrained_model -``` - -### 3. Training - -select confiure file for training according to the DECODER\_NAME, BACKBONE\_NAME and DATASET\_NAME. -``` -CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch train.py --use_gpu --use_mpio \ - --cfg ./configs/pspnet_res101_cityscapes.yaml -``` - -### 4. Testing -select confiure file for testing according to the DECODER\_NAME, BACKBONE\_NAME and DATASET\_NAME. - -Single-scale testing: -``` -CUDA_VISIBLE_DEVICES=0 python eval.py --use_gpu \ - --use_mpio \ - --cfg ./configs/pspnet_res101_cityscapes.yaml -``` - -Multi-scale testing: -``` -CUDA_VISIBLE_DEVICES=0 python eval.py --use_gpu \ - --use_mpio \ - --multi_scales \ - --cfg ./configs/pspnet_res101_cityscapes.yaml -``` - -## Contact -If you have any questions regarding the repo, please create an issue. diff --git a/PaddleCV/Research/SemSegPaddle/configs/deeplabv3_res101_cityscapes.yaml b/PaddleCV/Research/SemSegPaddle/configs/deeplabv3_res101_cityscapes.yaml deleted file mode 100644 index 093758b507bf5a8bc963ca20d3e4fff56adf7fdd..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/configs/deeplabv3_res101_cityscapes.yaml +++ /dev/null @@ -1,46 +0,0 @@ -DATAAUG: - RAND_SCALE_MIN: 0.75 - RAND_SCALE_MAX: 2.0 - BASE_SIZE: 1024 - CROP_SIZE: 769 - EXTRA: True -TRAIN_BATCH_SIZE_PER_GPU: 2 -NUM_TRAINERS: 4 -EVAL_BATCH_SIZE: 1 -DATASET: - DATASET_NAME: "cityscapes" - DATA_DIR: "./data/cityscapes/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 19 - TEST_FILE_LIST: "./data/cityscapes/cityscapes_list/test.lst" - TRAIN_FILE_LIST: "./data/cityscapes/cityscapes_list/train.lst" - VAL_FILE_LIST: "./data/cityscapes/cityscapes_list/val.lst" - IGNORE_INDEX: 255 - DATA_DIM: 3 -MODEL: - MODEL_NAME: "deeplabv3" - DEFAULT_NORM_TYPE: "bn" - MULTI_LOSS_WEIGHT: [1.0,0.4] - BACKBONE: "resnet" - BACKBONE_LAYERS: 101 - BACKBONE_MULTI_GRID: True - DEEPLABv3: - DEPTH_MULTIPLIER: 1 - ASPP_WITH_SEP_CONV: True - AuxHead: True -TRAIN: - PRETRAINED_MODEL_DIR: "./pretrained_model/resnet101_v2/" - MODEL_SAVE_DIR: "./snapshots/deeplabv3_resnet_cityscapes/" - SNAPSHOT_EPOCH: 1 -TEST: - TEST_MODEL: "./snapshots/deeplabv3_resnet_cityscapes" - BASE_SIZE: 2048 - CROP_SIZE: 769 - SLIDE_WINDOW: True -SOLVER: - LR: 0.01 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 80 - LOSS: "['softmax_loss']" - diff --git a/PaddleCV/Research/SemSegPaddle/configs/deeplabv3_res101_pascalcontext.yaml b/PaddleCV/Research/SemSegPaddle/configs/deeplabv3_res101_pascalcontext.yaml deleted file mode 100644 index fa41bfb02844293390df3ce6c2e271cb5a2e80ee..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/configs/deeplabv3_res101_pascalcontext.yaml +++ /dev/null @@ -1,47 +0,0 @@ -DATAAUG: - RAND_SCALE_MIN: 0.5 - RAND_SCALE_MAX: 2.0 - BASE_SIZE: 520 - CROP_SIZE: 520 - EXTRA: True -TRAIN_BATCH_SIZE_PER_GPU: 4 -NUM_TRAINERS: 4 -EVAL_BATCH_SIZE: 1 -DATASET: - DATASET_NAME: "pascalContext" - DATA_DIR: "./data/pascalContext/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 59 - TEST_FILE_LIST: "./data/pascalContext/pascal_context_val.txt" - TRAIN_FILE_LIST: "./data/pascalContext/pascal_context_train.txt" - VAL_FILE_LIST: "./data/pascalContext/pascal_context_val.txt" - IGNORE_INDEX: -1 - DATA_DIM: 3 - SEPARATOR: ' ' -MODEL: - MODEL_NAME: "deeplabv3" - DEFAULT_NORM_TYPE: "bn" - MULTI_LOSS_WEIGHT: [1.0,0.4] - BACKBONE: "resnet" - BACKBONE_LAYERS: 101 - BACKBONE_MULTI_GRID: True - DEEPLABv3: - DEPTH_MULTIPLIER: 1 - ASPP_WITH_SEP_CONV: True - AuxHead: True -TRAIN: - PRETRAINED_MODEL_DIR: "./pretrained_model/resnet101_v2/" - MODEL_SAVE_DIR: "./snapshots/deeplabv3_resnet_pascalcontext/" - SNAPSHOT_EPOCH: 1 -TEST: - TEST_MODEL: "./snapshots/deeplabv3_resnet_pascalcontext" - BASE_SIZE: 520 - CROP_SIZE: 520 - SLIDE_WINDOW: True -SOLVER: - LR: 0.005 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 80 - LOSS: "['softmax_loss']" - diff --git a/PaddleCV/Research/SemSegPaddle/configs/glore_res101_cityscapes.yaml b/PaddleCV/Research/SemSegPaddle/configs/glore_res101_cityscapes.yaml deleted file mode 100644 index fa26415584f1f6391b2562981bbfdbaa06d02354..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/configs/glore_res101_cityscapes.yaml +++ /dev/null @@ -1,45 +0,0 @@ -DATAAUG: - RAND_SCALE_MIN: 0.5 - RAND_SCALE_MAX: 2.0 - BASE_SIZE: 1024 - CROP_SIZE: 769 - EXTRA: True -TRAIN_BATCH_SIZE_PER_GPU: 2 -NUM_TRAINERS: 4 -EVAL_BATCH_SIZE: 1 -DATASET: - DATASET_NAME: "cityscapes" - DATA_DIR: "./data/cityscapes/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 19 - TEST_FILE_LIST: "./data/cityscapes/cityscapes_list/test.lst" - TRAIN_FILE_LIST: "./data/cityscapes/cityscapes_list/train.lst" - VAL_FILE_LIST: "./data/cityscapes/cityscapes_list/val.lst" - IGNORE_INDEX: 255 - DATA_DIM: 3 -MODEL: - MODEL_NAME: "glore" - DEFAULT_NORM_TYPE: "bn" - MULTI_LOSS_WEIGHT: [1.0, 0.4] - BACKBONE: "resnet" - BACKBONE_LAYERS: 101 - BACKBONE_MULTI_GRID: True - GLORE: - DEPTH_MULTIPLIER: 1 - AuxHead: True -TRAIN: - MODEL_SAVE_DIR: "snapshots/glore_res101_cityscapes/" - PRETRAINED_MODEL_DIR: "./pretrained_model/resnet101_v2/" - SNAPSHOT_EPOCH: 1 -TEST: - TEST_MODEL: "snapshots/glore_res101_cityscapes" - BASE_SIZE: 2048 - CROP_SIZE: 769 - SLIDE_WINDOW: True -SOLVER: - LR: 0.01 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 80 - LOSS: "['softmax_loss']" - diff --git a/PaddleCV/Research/SemSegPaddle/configs/glore_res101_pascalcontext.yaml b/PaddleCV/Research/SemSegPaddle/configs/glore_res101_pascalcontext.yaml deleted file mode 100644 index 9dd26b95aba4d14a33af2751182c9c2ff485416a..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/configs/glore_res101_pascalcontext.yaml +++ /dev/null @@ -1,45 +0,0 @@ -DATAAUG: - RAND_SCALE_MIN: 0.5 - RAND_SCALE_MAX: 2.0 - BASE_SIZE: 520 - CROP_SIZE: 520 - EXTRA: True -TRAIN_BATCH_SIZE_PER_GPU: 4 -NUM_TRAINERS: 4 -EVAL_BATCH_SIZE: 1 -DATASET: - DATASET_NAME: "pascalContext" - DATA_DIR: "./data/pascalContext/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 59 - TEST_FILE_LIST: "./data/pascalContext/pascal_context_val.txt" - TRAIN_FILE_LIST: "./data/pascalContext/pascal_context_train.txt" - VAL_FILE_LIST: "./data/pascalContext/pascal_context_val.txt" - IGNORE_INDEX: -1 - DATA_DIM: 3 - SEPARATOR: ' ' -MODEL: - MODEL_NAME: "glore" - DEFAULT_NORM_TYPE: "bn" - MULTI_LOSS_WEIGHT: [1.0,0.4] - BACKBONE: "resnet" - BACKBONE_LAYERS: 101 - BACKBONE_MULTI_GRID: True - GLORE: - DEPTH_MULTIPLIER: 1 - AuxHead: True -TEST: - TEST_MODEL: "snapshots/glore_res101_pascalContext" - BASE_SIZE: 520 - CROP_SIZE: 520 - SLIDE_WINDOW: True -TRAIN: - MODEL_SAVE_DIR: "snapshots/glore_res101_pascalContext/" - PRETRAINED_MODEL_DIR: "./pretrained_model/resnet101_v2/" - SNAPSHOT_EPOCH: 1 -SOLVER: - LR: 0.005 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 80 - LOSS: "['softmax_loss']" diff --git a/PaddleCV/Research/SemSegPaddle/configs/pspnet_hrnet_cityscapes.yaml b/PaddleCV/Research/SemSegPaddle/configs/pspnet_hrnet_cityscapes.yaml deleted file mode 100644 index 8aa5a38c869811fac76cc3e0e994d3140ea0b012..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/configs/pspnet_hrnet_cityscapes.yaml +++ /dev/null @@ -1,43 +0,0 @@ -DATAAUG: - RAND_SCALE_MIN: 0.75 - RAND_SCALE_MAX: 2.0 - BASE_SIZE: 2048 - CROP_SIZE: 769 - SLIDE_WINDOW: True -TRAIN_BATCH_SIZE_PER_GPU: 2 -EVAL_BATCH_SIZE: 1 -NUM_TRAINERS: 4 -DATASET: - DATASET_NAME: "cityscapes" - DATA_DIR: "./data/cityscapes/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 19 - TEST_FILE_LIST: "./data/cityscapes/cityscapes_list/test.lst" - TRAIN_FILE_LIST: "./data/cityscapes/cityscapes_list/train.lst" - VAL_FILE_LIST: "./data/cityscapes/cityscapes_list/val.lst" - IGNORE_INDEX: 255 - DATA_DIM: 3 -MODEL: - MODEL_NAME: "pspnet" - DEFAULT_NORM_TYPE: "bn" - MULTI_LOSS_WEIGHT: [1.0,] - BACKBONE: "hrnet" - PSPNET: - DEPTH_MULTIPLIER: 1 - AuxHead: False -TRAIN: - MODEL_SAVE_DIR: "snapshots/pspnet_hrnet_cityscapes/" - PRETRAINED_MODEL_DIR: "./pretrained_model/HRNet_W40_C_pretrained/" - SNAPSHOT_EPOCH: 1 -TEST: - TEST_MODEL: "snapshots/pspnet_hrnet_cityscapes" - BASE_SIZE: 2048 - CROP_SIZE: 769 - SLIDE_WINDOW: True -SOLVER: - LR: 0.001 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 240 - LOSS: "['softmax_loss']" - diff --git a/PaddleCV/Research/SemSegPaddle/configs/pspnet_res101_ade.yaml b/PaddleCV/Research/SemSegPaddle/configs/pspnet_res101_ade.yaml deleted file mode 100644 index 02423c5bbd99b5bc5188b9299088fb5b05a7294f..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/configs/pspnet_res101_ade.yaml +++ /dev/null @@ -1,44 +0,0 @@ -DATAAUG: - RAND_SCALE_MIN: 0.5 - RAND_SCALE_MAX: 2.0 - BASE_SIZE: 520 - CROP_SIZE: 520 - EXTRA: True -TRAIN_BATCH_SIZE_PER_GPU: 2 -NUM_TRAINERS: 4 -EVAL_BATCH_SIZE: 1 -DATASET: - DATASET_NAME: "ade" - DATA_DIR: "./data/ade/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 150 - TEST_FILE_LIST: "./data/ade/ade_val.lst" - TRAIN_FILE_LIST: "./data/ade/ade_train.lst" - VAL_FILE_LIST: "./data/ade/ade_val.lst" - IGNORE_INDEX: -1 - DATA_DIM: 3 -MODEL: - MODEL_NAME: "pspnet" - DEFAULT_NORM_TYPE: "bn" - MULTI_LOSS_WEIGHT: [1.0, 0.4] - BACKBONE: "resnet" - BACKBONE_LAYERS: 101 - BACKBONE_MULTI_GRID: True - PSPNET: - DEPTH_MULTIPLIER: 1 - AuxHead: True -TEST: - TEST_MODEL: "snapshots/pspnet_res101_ade/" - BASE_SIZE: 520 - CROP_SIZE: 520 - SLIDE_WINDOW: True -TRAIN: - MODEL_SAVE_DIR: "snapshots/pspnet_res101_ade/" - PRETRAINED_MODEL_DIR: "./pretrained_model/resnet101_v2/" - SNAPSHOT_EPOCH: 10 -SOLVER: - LR: 0.01 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 120 - LOSS: "['softmax_loss']" diff --git a/PaddleCV/Research/SemSegPaddle/configs/pspnet_res101_cityscapes.yaml b/PaddleCV/Research/SemSegPaddle/configs/pspnet_res101_cityscapes.yaml deleted file mode 100644 index a759677e92398993a8f052de6f20bd5fc65e8984..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/configs/pspnet_res101_cityscapes.yaml +++ /dev/null @@ -1,45 +0,0 @@ -DATAAUG: - RAND_SCALE_MIN: 0.5 - RAND_SCALE_MAX: 2.0 - BASE_SIZE: 1024 - CROP_SIZE: 769 - EXTRA: True -TRAIN_BATCH_SIZE_PER_GPU: 2 -NUM_TRAINERS: 4 -EVAL_BATCH_SIZE: 1 -DATASET: - DATASET_NAME: "cityscapes" - DATA_DIR: "./data/cityscapes/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 19 - TEST_FILE_LIST: "./data/cityscapes/cityscapes_list/test.lst" - TRAIN_FILE_LIST: "./data/cityscapes/cityscapes_list/train.lst" - VAL_FILE_LIST: "./data/cityscapes/cityscapes_list/val.lst" - IGNORE_INDEX: 255 - DATA_DIM: 3 -MODEL: - MODEL_NAME: "pspnet" - DEFAULT_NORM_TYPE: "bn" - MULTI_LOSS_WEIGHT: [1.0, 0.4] - BACKBONE: "resnet" - BACKBONE_LAYERS: 101 - BACKBONE_MULTI_GRID: True - PSPNET: - DEPTH_MULTIPLIER: 1 - AuxHead: True -TRAIN: - MODEL_SAVE_DIR: "snapshots/pspnet_res101_cityscapes/" - PRETRAINED_MODEL_DIR: "./pretrained_model/resnet101_v2/" - SNAPSHOT_EPOCH: 1 -TEST: - TEST_MODEL: "snapshots/pspnet_res101_cityscapes" - BASE_SIZE: 2048 - CROP_SIZE: 769 - SLIDE_WINDOW: True -SOLVER: - LR: 0.01 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 80 - LOSS: "['softmax_loss']" - diff --git a/PaddleCV/Research/SemSegPaddle/configs/pspnet_res101_pascalcontext.yaml b/PaddleCV/Research/SemSegPaddle/configs/pspnet_res101_pascalcontext.yaml deleted file mode 100644 index 111a6768b78ef0459a5e15b9c40526f9499915f9..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/configs/pspnet_res101_pascalcontext.yaml +++ /dev/null @@ -1,45 +0,0 @@ -DATAAUG: - RAND_SCALE_MIN: 0.5 - RAND_SCALE_MAX: 2.0 - BASE_SIZE: 520 - CROP_SIZE: 520 - EXTRA: True -TRAIN_BATCH_SIZE_PER_GPU: 4 -NUM_TRAINERS: 4 -EVAL_BATCH_SIZE: 1 -DATASET: - DATASET_NAME: "pascalContext" - DATA_DIR: "./data/pascalContext/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 59 - TEST_FILE_LIST: "./data/pascalContext/pascal_context_val.txt" - TRAIN_FILE_LIST: "./data/pascalContext/pascal_context_train.txt" - VAL_FILE_LIST: "./data/pascalContext/pascal_context_val.txt" - IGNORE_INDEX: -1 - DATA_DIM: 3 - SEPARATOR: ' ' -MODEL: - MODEL_NAME: "pspnet" - DEFAULT_NORM_TYPE: "bn" - MULTI_LOSS_WEIGHT: [1.0,0.4] - BACKBONE: "resnet" - BACKBONE_LAYERS: 101 - BACKBONE_MULTI_GRID: True - PSPNET: - DEPTH_MULTIPLIER: 1 - AuxHead: True -TEST: - TEST_MODEL: "snapshots/pspnet_res101_pascalContext" - BASE_SIZE: 520 - CROP_SIZE: 520 - SLIDE_WINDOW: True -TRAIN: - MODEL_SAVE_DIR: "snapshots/pspnet_res101_pascalContext/" - PRETRAINED_MODEL_DIR: "./pretrained_model/resnet101_v2/" - SNAPSHOT_EPOCH: 1 -SOLVER: - LR: 0.005 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 80 - LOSS: "['softmax_loss']" diff --git a/PaddleCV/Research/SemSegPaddle/data/note.txt b/PaddleCV/Research/SemSegPaddle/data/note.txt deleted file mode 100644 index 08a033d979d1aee4253a7b48b6aff911616f38f4..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/data/note.txt +++ /dev/null @@ -1 +0,0 @@ -please create symlinks for datasets diff --git a/PaddleCV/Research/SemSegPaddle/eval.py b/PaddleCV/Research/SemSegPaddle/eval.py deleted file mode 100644 index 4195be40e939ecf4bc67adf2c393fde86a01f15c..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/eval.py +++ /dev/null @@ -1,311 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -# GPU memory garbage collection optimization flags -os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" -import sys -import time -import argparse -import functools -import pprint -import cv2 -import numpy as np -import paddle -import paddle.fluid as fluid -import math - -from src.utils.config import cfg -from src.utils.timer import Timer, calculate_eta -from src.models.model_builder import build_model -from src.models.model_builder import ModelPhase -from src.datasets import build_dataset -from src.utils.metrics import ConfusionMatrix - - -def parse_args(): - parser = argparse.ArgumentParser(description='SemsegPaddle') - parser.add_argument( - '--cfg', - dest='cfg_file', - help='Config file for training (and optionally testing)', - default=None, - type=str) - parser.add_argument( - '--use_gpu', - dest='use_gpu', - help='Use gpu or cpu', - action='store_true', - default=False) - parser.add_argument( - '--use_mpio', - dest='use_mpio', - help='Use multiprocess IO or not', - action='store_true', - default=False) - parser.add_argument( - 'opts', - help='See utils/config.py for all options', - default=None, - nargs=argparse.REMAINDER) - parser.add_argument( - '--multi_scales', - dest='multi_scales', - help='Use multi_scales for eval', - action='store_true', - default=False) - parser.add_argument( - '--flip', - dest='flip', - help='flip the image or not', - action='store_true', - default=False) - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - return parser.parse_args() - - -def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, multi_scales=False, flip=False, **kwargs): - np.set_printoptions(precision=5, suppress=True) - - num_classes = cfg.DATASET.NUM_CLASSES - base_size = cfg.TEST.BASE_SIZE - crop_size = cfg.TEST.CROP_SIZE - startup_prog = fluid.Program() - test_prog = fluid.Program() - dataset = build_dataset(cfg.DATASET.DATASET_NAME, - file_list=cfg.DATASET.VAL_FILE_LIST, - mode=ModelPhase.EVAL, - data_dir=cfg.DATASET.DATA_DIR) - - def data_generator(): - #TODO: check is batch reader compatitable with Windows - if use_mpio: - data_gen = dataset.multiprocess_generator( - num_processes=cfg.DATALOADER.NUM_WORKERS, - max_queue_size=cfg.DATALOADER.BUF_SIZE) - else: - data_gen = dataset.generator() - - for b in data_gen: - yield b[0], b[1], b[2] - - py_reader, avg_loss, out, grts, masks = build_model( - test_prog, startup_prog, phase=ModelPhase.EVAL) - - py_reader.decorate_sample_generator( - data_generator, drop_last=False, batch_size=cfg.EVAL_BATCH_SIZE, places=fluid.cuda_places()) - - # Get device environment - places = fluid.cuda_places() if use_gpu else fluid.cpu_places() - place = places[0] - dev_count = len(places) - print("#Device count: {}".format(dev_count)) - - exe = fluid.Executor(place) - exe.run(startup_prog) - - test_prog = test_prog.clone(for_test=True) - - ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir - - if ckpt_dir is not None: - filename= '{}_{}_{}_epoch_{}.pdparams'.format(str(cfg.MODEL.MODEL_NAME), - str(cfg.MODEL.BACKBONE), str(cfg.DATASET.DATASET_NAME), cfg.SOLVER.NUM_EPOCHS) - print("loading testing model file: {}/{}".format(ckpt_dir, filename)) - fluid.io.load_params(exe, ckpt_dir, main_program=test_prog, filename=filename) - - # Use streaming confusion matrix to calculate mean_iou - np.set_printoptions( - precision=4, suppress=True, linewidth=160, floatmode="fixed") - conf_mat = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) - - #fetch_list: return of the model - fetch_list = [avg_loss.name, out.name] - num_images = 0 - step = 0 - all_step = cfg.DATASET.VAL_TOTAL_IMAGES // cfg.EVAL_BATCH_SIZE - timer = Timer() - timer.start() - for data in py_reader(): - mask = np.array(data[0]['mask']) - label = np.array(data[0]['label']) - image_org = np.array(data[0]['image']) - image = np.transpose(image_org, (0, 2, 3, 1)) # BCHW->BHWC - image = np.squeeze(image) - - if cfg.TEST.SLIDE_WINDOW: - if not multi_scales: - scales = [1.0] - else: - scales = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25] if cfg.DATASET.DATASET_NAME == 'cityscapes' else [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] - #scales = [0.75, 1.0, 1.25] # fast multi-scale testing - - #strides - stride = int(crop_size *1.0 / 3) # 1/3 > 2/3 > 1/2 for input_size: 769 x 769 - h, w = image.shape[0:2] - scores = np.zeros(shape=[num_classes, h, w], dtype='float32') - - for scale in scales: - long_size = int(math.ceil(base_size * scale)) - if h > w: - height = long_size - width = int(1.0 * w * long_size / h + 0.5) - short_size = width - else: - width = long_size - height = int(1.0 * h * long_size / w + 0.5) - short_size = height - # print('org_img_size: {}x{}, rescale_img_size: {}x{}'.format(h, w, height, width)) - cur_img = image_resize(image, height, width) - # pading - if long_size <= crop_size: - pad_img = pad_single_image(cur_img, crop_size) - label_feed, mask_feed = get_feed(pad_img) - pad_img = mapper_image(pad_img) - loss, pred1 = exe.run( - test_prog, - feed={'image':pad_img, 'label':label_feed, 'mask':mask_feed}, - fetch_list = fetch_list, - return_numpy=True) - pred1 = np.array(pred1) - outputs = pred1[:, :, :height, :width] - if flip: - pad_img_flip = flip_left_right_image(cur_img) - pad_img_flip = pad_single_image(pad_img_flip, crop_size) - label_feed, mask_feed = get_feed(pad_img_flip) - - pad_img_flip = mapper_image(pad_img_flip) - loss, pred1 = exe.run( - test_prog, - feed={'image':pad_img_flip, 'label':label_feed, 'mask':mask_feed}, - fetch_list = fetch_list, - return_numpy=True) - pred1 = np.flip(pred1, 3) - outputs += pred1[:, :, :height, :width] - else: - if short_size < crop_size: - pad_img = pad_single_image(cur_img, crop_size) - else: - pad_img = cur_img - ph, pw = pad_img.shape[0:2] - - #slid window - h_grids = int(math.ceil(1.0 * (ph - crop_size) / stride)) + 1 - w_grids = int(math.ceil(1.0 * (pw - crop_size) / stride)) + 1 - outputs = np.zeros(shape=[1, num_classes, ph, pw], dtype='float32') - count_norm = np.zeros(shape=[1, 1, ph, pw], dtype='int32') - for idh in range(h_grids): - for idw in range(w_grids): - h0 = idh * stride - w0 = idw * stride - h1 = min(h0 + crop_size, ph) - w1 = min(w0 + crop_size, pw) - #print('(h0,w0,h1,w1):({},{},{},{})'.format(h0, w0, h1, w1)) - crop_img = crop_image(pad_img, h0, w0, h1, w1) - pad_crop_img = pad_single_image(crop_img, crop_size) - label_feed, mask_feed = get_feed(pad_crop_img) - pad_crop_img = mapper_image(pad_crop_img) - loss, pred1 = exe.run( - test_prog, - feed={'image':pad_crop_img, 'label':label_feed, 'mask':mask_feed}, - fetch_list = fetch_list, - return_numpy=True) - pred1 = np.array(pred1) - outputs[:, :, h0:h1, w0:w1] += pred1[:, :, 0:h1-h0, 0:w1-w0] - count_norm[:, :, h0:h1, w0:w1] += 1 - if flip: - pad_img_flip = flip_left_right_image(crop_img) - pad_img_flip = pad_single_image(pad_img_flip, crop_size) - label_feed, mask_feed = get_feed(pad_img_flip) - pad_img_flip = mapper_image(pad_img_flip) - loss, pred1 = exe.run( - test_prog, - feed={'image':pad_img_flip, 'label':label_feed, 'mask':mask_feed}, - fetch_list = fetch_list, - return_numpy = True) - pred1 = np.flip(pred1, 3) - outputs[:, :, h0:h1, w0:w1] += pred1[:, :, 0:h1-h0, 0:w1-w0] - count_norm[:, :, h0:h1, w0:w1] += 1 - - outputs = 1.0 * outputs / count_norm - outputs = outputs[:, :, :height, :width] - with fluid.dygraph.guard(): - outputs = fluid.dygraph.to_variable(outputs) - outputs = fluid.layers.resize_bilinear(outputs, out_shape=[h, w]) - score = outputs.numpy()[0] - scores += score - else: - # taking the original image as the model input - loss, pred = exe.run( - test_prog, - feed={'image':image_org, 'label':label, 'mask':mask}, - fetch_list = fetch_list, - return_numpy = True) - scores = pred[0] - # computing IoU with all scale result - pred = np.argmax(scores, axis=0).astype('int64') - pred = pred[np.newaxis, :, :, np.newaxis] - step += 1 - num_images += pred.shape[0] - conf_mat.calculate(pred, label, mask) - _, iou = conf_mat.mean_iou() - _, acc = conf_mat.accuracy() - - print("[EVAL] step={}/{} acc={:.4f} IoU={:.4f}".format(step, all_step, acc, iou)) - - category_iou, avg_iou = conf_mat.mean_iou() - category_acc, avg_acc = conf_mat.accuracy() - print("[EVAL] #image={} acc={:.4f} IoU={:.4f}".format(num_images, avg_acc, avg_iou)) - print("[EVAL] Category IoU:", category_iou) - print("[EVAL] Category Acc:", category_acc) - print("[EVAL] Kappa:{:.4f}".format(conf_mat.kappa())) - print("flip = ", flip) - print("scales = ", scales) - - return category_iou, avg_iou, category_acc, avg_acc - -def image_resize(image, height, width): - if image.shape[0] == 3: - image = np.transpose(image, (1, 2, 0)) - image = cv2.resize(image, (width, height), interpolation=cv2.INTER_LINEAR) - return image - -def pad_single_image(image, crop_size): - h, w = image.shape[0:2] - pad_h = crop_size - h if h < crop_size else 0 - pad_w = crop_size - w if w < crop_size else 0 - image = cv2.copyMakeBorder(image, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT,value=0) - return image - -def mapper_image(image): - # HxWx3 -> 3xHxW -> 1x3xHxW - image_array = np.transpose(image, (2, 0, 1)) - image_array = image_array.astype('float32') - image_array = image_array[np.newaxis, :] - return image_array - -def flip_left_right_image(image): - return cv2.flip(image, 1) - -def get_feed(image): - h, w = image.shape[0:2] - return np.zeros([1, 1, h, w], dtype='int32'), np.zeros([1, 1, h, w], dtype='int32') - -def crop_image(image, h0, w0, h1, w1): - return image[h0:h1, w0:w1, :] - -def main(): - args = parse_args() - if args.cfg_file is not None: - cfg.update_from_file(args.cfg_file) - if args.opts: - cfg.update_from_list(args.opts) - cfg.check_and_infer() - print(pprint.pformat(cfg)) - evaluate(cfg, **args.__dict__) - - -if __name__ == '__main__': - main() diff --git a/PaddleCV/Research/SemSegPaddle/expes/deeplabv3_res101_cityscapes.sh b/PaddleCV/Research/SemSegPaddle/expes/deeplabv3_res101_cityscapes.sh deleted file mode 100755 index 98e44a0892d2e4be7c9505864d7b459d8a20bbe6..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/expes/deeplabv3_res101_cityscapes.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - - -# Deeplabv3_Res101_Cityscapes -# 1.1 Training -CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --use_gpu \ - --use_mpio \ - --cfg ./configs/deeplabv3_res101_cityscapes.yaml | tee -a train.log 2>&1 -# 1.2 single-scale testing -CUDA_VISIBLE_DEVICES=0 python eval.py --use_gpu \ - --cfg ./configs/deeplabv3_res101_cityscapes.yaml -# 1.3 multi-scale testing -CUDA_VISIBLE_DEVICES=0 python eval.py --use_gpu \ - --multi_scales \ - --cfg ./configs/deeplabv3_res101_cityscapes.yaml - - diff --git a/PaddleCV/Research/SemSegPaddle/expes/deeplabv3_res101_pascalcontext.sh b/PaddleCV/Research/SemSegPaddle/expes/deeplabv3_res101_pascalcontext.sh deleted file mode 100755 index a356a18d6483bf613975e3808c3e63a3db25d543..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/expes/deeplabv3_res101_pascalcontext.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - - -# Deeplabv3_Res101_PascalContext -# 1.1 Training -CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --use_gpu \ - --use_mpio \ - --cfg ./configs/deeplabv3_res101_pascalcontext.yaml | tee -a train.log 2>&1 -# 1.2 single-scale testing -CUDA_VISIBLE_DEVICES=0 python eval.py --use_gpu \ - --cfg ./configs/deeplabv3_res101_pascalcontext.yaml -# 1.3 multi-scale testing -CUDA_VISIBLE_DEVICES=0 python eval.py --use_gpu \ - --multi_scales \ - --cfg ./configs/deeplabv3_res101_pascalcontext.yaml - - - - diff --git a/PaddleCV/Research/SemSegPaddle/expes/glore_res101_cityscapes.sh b/PaddleCV/Research/SemSegPaddle/expes/glore_res101_cityscapes.sh deleted file mode 100755 index 075b0b974da19641b32c8410bcda9c897dabcab0..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/expes/glore_res101_cityscapes.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - - -# GloRe_Res101_Cityscapes -# 1.1 Training -CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --use_gpu \ - --use_mpio \ - --cfg ./configs/glore_res101_cityscapes.yaml | tee -a train.log 2>&1 -# 1.2 single-scale testing -CUDA_VISIBLE_DEVICES=0 python eval.py --use_gpu \ - --cfg ./configs/glore_res101_cityscapes.yaml -# 1.3 multi-scale testing -CUDA_VISIBLE_DEVICES=0 python eval.py --use_gpu \ - --multi_scales \ - --cfg ./configs/glore_res101_cityscapes.yaml - diff --git a/PaddleCV/Research/SemSegPaddle/expes/glore_res101_pascalcontext.sh b/PaddleCV/Research/SemSegPaddle/expes/glore_res101_pascalcontext.sh deleted file mode 100755 index 452df25133713ac1c864f06dbbe9f21c3e162781..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/expes/glore_res101_pascalcontext.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - - -# GloRe_Res101_PascalContext -:<&1 -! -# 1.2 single-scale testing -CUDA_VISIBLE_DEVICES=0 python eval.py --use_gpu \ - --cfg ./configs/glore_res101_pascalcontext.yaml -:<&1 -# 1.2 single-scale testing -CUDA_VISIBLE_DEVICES=0 python eval.py --use_gpu \ - --cfg ./configs/pspnet_res101_cityscapes.yaml -# 1.3 multi-scale testing -CUDA_VISIBLE_DEVICES=0 python eval.py --use_gpu \ - --multi_scales \ - --cfg ./configs/pspnet_res101_cityscapes.yaml diff --git a/PaddleCV/Research/SemSegPaddle/expes/pspnet_res101_pascalcontext.sh b/PaddleCV/Research/SemSegPaddle/expes/pspnet_res101_pascalcontext.sh deleted file mode 100755 index 1959e6e01a0c59c45f30b9e0bb1ec0dbf7c4dde5..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/expes/pspnet_res101_pascalcontext.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - - -#PSPNet_Res101_PascalContext -# 1.1 training -CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --use_gpu \ - --cfg ./configs/pspnet_res101_pascalcontext.yaml | tee -a train.log 2>&1 -# 1.2 single-scale testing -CUDA_VISIBLE_DEVICES=0 python eval.py --use_gpu \ - --cfg ./configs/pspnet_res101_pascalcontext.yaml -# 1.3 multi-scale testing -CUDA_VISIBLE_DEVICES=0 python eval.py --use_gpu \ - --multi_scales \ - --cfg ./configs/pspnet_res101_pascalcontext.yaml diff --git a/PaddleCV/Research/SemSegPaddle/pretrained_model/note.txt b/PaddleCV/Research/SemSegPaddle/pretrained_model/note.txt deleted file mode 100644 index 6580b2565e4d36e172b75abf55e41752620a80ee..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/pretrained_model/note.txt +++ /dev/null @@ -1 +0,0 @@ -please put the pretrained weights of backbone here diff --git a/PaddleCV/Research/SemSegPaddle/snapshots/note.txt b/PaddleCV/Research/SemSegPaddle/snapshots/note.txt deleted file mode 100644 index 9fe80f1163d114bb2bfc9c8033de12576b7a7c3e..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/snapshots/note.txt +++ /dev/null @@ -1 +0,0 @@ -please put the trained model here diff --git a/PaddleCV/Research/SemSegPaddle/src/__init__.py b/PaddleCV/Research/SemSegPaddle/src/__init__.py deleted file mode 100644 index a65af8351df2131361501fc0dce51af3b3252313..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from . import datasets, models, utils diff --git a/PaddleCV/Research/SemSegPaddle/src/datasets/__init__.py b/PaddleCV/Research/SemSegPaddle/src/datasets/__init__.py deleted file mode 100644 index 7c1c1a7255c4c7fbb58906bf96fd8061b14abc34..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/datasets/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -# coding: utf8 -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .cityscapes import CityscapesSeg -from .pascal_context import PascalContextSeg -from .ade import AdeSeg -datasets ={ - 'cityscapes': CityscapesSeg, - 'pascalcontext': PascalContextSeg, - 'adechallengedata2016': AdeSeg, -} -def build_dataset(name, **kwargs): - return datasets[name.lower()](**kwargs) - - diff --git a/PaddleCV/Research/SemSegPaddle/src/datasets/ade.py b/PaddleCV/Research/SemSegPaddle/src/datasets/ade.py deleted file mode 100644 index 6b63d8e297ff1ced2705c7b484cff9d3f2b61a35..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/datasets/ade.py +++ /dev/null @@ -1,105 +0,0 @@ -# coding: utf8 -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -import sys -import os -import math -import random -import functools -import io -import time -import codecs -import numpy as np -import paddle -import paddle.fluid as fluid -import cv2 -from PIL import Image -import copy - -from src.utils.config import cfg -from src.models.model_builder import ModelPhase -from .baseseg import BaseSeg - - -class AdeSeg(BaseSeg): - def __init__(self, - file_list, - data_dir, - shuffle=False, - mode=ModelPhase.TRAIN, base_size=520, crop_size=520, rand_scale=True): - super(AdeSeg, self).__init__(file_list, data_dir, shuffle, mode, base_size, crop_size, rand_scale) - - def _mask_transform(self, mask): - target = np.array(mask).astype('int32') - 1 - return target - - - - def load_image(self, line, src_dir, mode=ModelPhase.TRAIN): - # original image cv2.imread flag setting - cv2_imread_flag = cv2.IMREAD_COLOR - if cfg.DATASET.IMAGE_TYPE == "rgba": - # If use RBGA 4 channel ImageType, use IMREAD_UNCHANGED flags to - # reserver alpha channel - cv2_imread_flag = cv2.IMREAD_UNCHANGED - #print("line: ", line) - parts = line.strip().split(cfg.DATASET.SEPARATOR) - if len(parts) != 2: - if mode == ModelPhase.TRAIN or mode == ModelPhase.EVAL: - raise Exception("File list format incorrect! It should be" - " image_name{}label_name\\n".format( - cfg.DATASET.SEPARATOR)) - img_name, grt_name = parts[0], None - else: - img_name, grt_name = parts[0], parts[1] - - img_path = os.path.join(src_dir, img_name) - img = self.cv2_imread(img_path, cv2_imread_flag) - - if grt_name is not None: - grt_path = os.path.join(src_dir, grt_name) - grt = self.pil_imread(grt_path) - else: - grt = None - - if img is None: - raise Exception( - "Empty image, src_dir: {}, img: {} & lab: {}".format( - src_dir, img_path, grt_path)) - - img_height = img.shape[0] - img_width = img.shape[1] - #print('img.shape',img.shape) - if grt is not None: - grt_height = grt.shape[0] - grt_width = grt.shape[1] - - if img_height != grt_height or img_width != grt_width: - raise Exception( - "source img and label img must has the same size") - else: - if mode == ModelPhase.TRAIN or mode == ModelPhase.EVAL: - raise Exception( - "Empty image, src_dir: {}, img: {} & lab: {}".format( - src_dir, img_path, grt_path)) - - if len(img.shape) < 3: - img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - - grt = self._mask_transform(grt) - - return img, grt, img_name, grt_name - diff --git a/PaddleCV/Research/SemSegPaddle/src/datasets/baseseg.py b/PaddleCV/Research/SemSegPaddle/src/datasets/baseseg.py deleted file mode 100644 index 5433342c8c0d54b2b45bd2fd9a4e01e0b135d0b4..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/datasets/baseseg.py +++ /dev/null @@ -1,239 +0,0 @@ -from __future__ import print_function -import sys -import os -import math -import random -import functools -import io -import time -import codecs -import numpy as np -import paddle -import paddle.fluid as fluid -import cv2 -import copy -from PIL import Image, ImageOps, ImageFilter, ImageEnhance - -from src.models.model_builder import ModelPhase -from src.utils.config import cfg -from .data_utils import GeneratorEnqueuer - - -class BaseSeg(object): - def __init__(self, file_list, data_dir, shuffle=False, mode=ModelPhase.TRAIN, base_size=1024, crop_size=769, rand_scale=True): - self.mode = mode - self.shuffle = shuffle - self.data_dir = data_dir - self.shuffle_seed = 0 - - self.crop_size = crop_size - self.base_size = base_size # short edge when training - self.rand_scale = rand_scale - - # NOTE: Please ensure file list was save in UTF-8 coding format - with codecs.open(file_list, 'r', 'utf-8') as flist: - self.lines = [line.strip() for line in flist] - self.all_lines = copy.deepcopy(self.lines) - if shuffle and cfg.NUM_TRAINERS > 1: - np.random.RandomState(self.shuffle_seed).shuffle(self.all_lines) - elif shuffle: - np.random.shuffle(self.lines) - self.num_trainers= cfg.NUM_TRAINERS - self.trainer_id=cfg.TRAINER_ID - - def generator(self): - if self.shuffle and cfg.NUM_TRAINERS > 1: - np.random.RandomState(self.shuffle_seed).shuffle(self.all_lines) - num_lines = len(self.all_lines) // cfg.NUM_TRAINERS - self.lines = self.all_lines[num_lines * cfg.TRAINER_ID: num_lines * (cfg.TRAINER_ID + 1)] - self.shuffle_seed += 1 - elif self.shuffle: - np.random.shuffle(self.lines) - - for line in self.lines: - yield self.process_image(line, self.data_dir, self.mode) - - def sharding_generator(self, pid=0, num_processes=1): - """ - Use line id as shard key for multiprocess io - It's a normal generator if pid=0, num_processes=1 - """ - for index, line in enumerate(self.lines): - # Use index and pid to shard file list - if index % num_processes == pid: - yield self.process_image(line, self.data_dir, self.mode) - - def batch_reader(self, batch_size): - br = self.batch(self.reader, batch_size) - for batch in br: - yield batch[0], batch[1], batch[2] - - def multiprocess_generator(self, max_queue_size=32, num_processes=8): - # Re-shuffle file list - if self.shuffle and cfg.NUM_TRAINERS > 1: - np.random.RandomState(self.shuffle_seed).shuffle(self.all_lines) - num_lines = len(self.all_lines) // self.num_trainers - self.lines = self.all_lines[num_lines * self.trainer_id: num_lines * (self.trainer_id + 1)] - self.shuffle_seed += 1 - elif self.shuffle: - np.random.shuffle(self.lines) - - # Create multiple sharding generators according to num_processes for multiple processes - generators = [] - for pid in range(num_processes): - generators.append(self.sharding_generator(pid, num_processes)) - - try: - enqueuer = GeneratorEnqueuer(generators) - enqueuer.start(max_queue_size=max_queue_size, workers=num_processes) - while True: - generator_out = None - while enqueuer.is_running(): - if not enqueuer.queue.empty(): - generator_out = enqueuer.queue.get(timeout=5) - break - else: - time.sleep(0.01) - if generator_out is None: - break - yield generator_out - finally: - if enqueuer is not None: - enqueuer.stop() - - def batch(self, reader, batch_size, is_test=False, drop_last=False): - def batch_reader(is_test=False, drop_last=drop_last): - if is_test: - imgs, grts, img_names, valid_shapes, org_shapes = [], [], [], [], [] - for img, grt, img_name, valid_shape, org_shape in reader(): - imgs.append(img) - grts.append(grt) - img_names.append(img_name) - valid_shapes.append(valid_shape) - org_shapes.append(org_shape) - if len(imgs) == batch_size: - yield np.array(imgs), np.array( - grts), img_names, np.array(valid_shapes), np.array( - org_shapes) - imgs, grts, img_names, valid_shapes, org_shapes = [], [], [], [], [] - - if not drop_last and len(imgs) > 0: - yield np.array(imgs), np.array(grts), img_names, np.array( - valid_shapes), np.array(org_shapes) - else: - imgs, labs, ignore = [], [], [] - bs = 0 - for img, lab, ig in reader(): - imgs.append(img) - labs.append(lab) - ignore.append(ig) - bs += 1 - if bs == batch_size: - yield np.array(imgs), np.array(labs), np.array(ignore) - bs = 0 - imgs, labs, ignore = [], [], [] - - if not drop_last and bs > 0: - yield np.array(imgs), np.array(labs), np.array(ignore) - - return batch_reader(is_test, drop_last) - - def load_image(self, line, src_dir, mode=ModelPhase.TRAIN): - raise NotImplemented - - def pil_imread(self, file_path): - """read pseudo-color label""" - im = Image.open(file_path) - return np.asarray(im) - - def cv2_imread(self, file_path, flag=cv2.IMREAD_COLOR): - # resolve cv2.imread open Chinese file path issues on Windows Platform. - return cv2.imdecode(np.fromfile(file_path, dtype=np.uint8), flag) - - def normalize_image(self, img): - img = img.transpose((2, 0, 1)).astype('float32') / 255.0 - img_mean = np.array(cfg.MEAN).reshape((len(cfg.MEAN), 1, 1)) - img_std = np.array(cfg.STD).reshape((len(cfg.STD), 1, 1)) - img -= img_mean - img /= img_std - - return img - - def process_image(self, line, data_dir, mode): - """ process_image """ - img, grt, img_name, grt_name = self.load_image( line, data_dir, mode=mode) # img.type: numpy.array, grt.type: numpy.array - if mode == ModelPhase.TRAIN: - # numpy.array convert to PIL.Image - img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) - grt = Image.fromarray(grt.astype('uint8')).convert('L') - - crop_size = self.crop_size - # random scale - if self.rand_scale: - short_size = random.randint(int(self.base_size * cfg.DATAAUG.RAND_SCALE_MIN), int(self.base_size * cfg.DATAAUG.RAND_SCALE_MAX)) - else: - short_size = self.base_size - w, h = img.size - if h > w: - out_w = short_size - out_h = int(1.0 * h / w * out_w) - else: - out_h = short_size - out_w = int(1.0 * w / h * out_h) - img = img.resize((out_w, out_h), Image.BILINEAR) - grt = grt.resize((out_w, out_h), Image.NEAREST) - - # rand flip - if random.random() > 0.5: - img = img.transpose(Image.FLIP_LEFT_RIGHT) - grt = grt.transpose(Image.FLIP_LEFT_RIGHT) - - # padding - if short_size < crop_size: - pad_h = crop_size - out_h if out_h < crop_size else 0 - pad_w = crop_size - out_w if out_w < crop_size else 0 - img = ImageOps.expand(img, border=(pad_w // 2, pad_h // 2, pad_w - pad_w // 2, pad_h - pad_h // 2), fill=0) - grt = ImageOps.expand(grt, border=(pad_w // 2, pad_h // 2, pad_w - pad_w // 2, pad_h - pad_h // 2), fill=cfg.DATASET.IGNORE_INDEX) - - # random crop - w, h = img.size - x = random.randint(0, w - crop_size) - y = random.randint(0, h - crop_size) - img = img.crop((x, y, x + crop_size, y + crop_size)) - grt = grt.crop((x, y, x + crop_size, y + crop_size)) - - - # gaussian blur - if cfg.DATAAUG_EXTRA: - if random.random() > 0.7: - img = img.filter(ImageFilter.GaussianBlur(radius=random.random())) - - # PIL.Image -> cv2 - img = cv2.cvtColor(np.asarray(img),cv2.COLOR_RGB2BGR) - grt = np.array(grt) - - elif ModelPhase.is_eval(mode): - org_shape = [img.shape[0], img.shape[1]] # 1024 x 2048 for cityscapes - - elif ModelPhase.is_visual(mode): - org_shape = [img.shape[0], img.shape[1]] - #img, grt = resize(img, grt, mode=mode) - valid_shape = [img.shape[0], img.shape[1]] - #img, grt = rand_crop(img, grt, mode=mode) - else: - raise ValueError("Dataset mode={} Error!".format(mode)) - - # Normalize image - img = self.normalize_image(img) - - if ModelPhase.is_train(mode) or ModelPhase.is_eval(mode): - grt = np.expand_dims(np.array(grt).astype('int32'), axis=0) - ignore = (grt != cfg.DATASET.IGNORE_INDEX).astype('int32') - - - if ModelPhase.is_train(mode): - return (img, grt, ignore) - elif ModelPhase.is_eval(mode): - return (img, grt, ignore) - elif ModelPhase.is_visual(mode): - return (img, grt, img_name, valid_shape, org_shape) diff --git a/PaddleCV/Research/SemSegPaddle/src/datasets/cityscapes.py b/PaddleCV/Research/SemSegPaddle/src/datasets/cityscapes.py deleted file mode 100644 index e9487bf40c180b96564342badeea2e7599c56377..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/datasets/cityscapes.py +++ /dev/null @@ -1,79 +0,0 @@ -from __future__ import print_function -import sys -import os -import math -import random -import functools -import io -import time -import codecs -import numpy as np -import paddle -import paddle.fluid as fluid -import cv2 -from PIL import Image -import copy - -from src.utils.config import cfg -from src.models.model_builder import ModelPhase -from .baseseg import BaseSeg - - -class CityscapesSeg(BaseSeg): - def __init__(self, file_list, data_dir, shuffle=False, mode=ModelPhase.TRAIN, base_size=1024, crop_size=769, rand_scale=True): - - super(CityscapesSeg, self).__init__(file_list, data_dir, shuffle, mode, base_size, crop_size, rand_scale) - - def load_image(self, line, src_dir, mode=ModelPhase.TRAIN): - # original image cv2.imread flag setting - cv2_imread_flag = cv2.IMREAD_COLOR - if cfg.DATASET.IMAGE_TYPE == "rgba": - # If use RBGA 4 channel ImageType, use IMREAD_UNCHANGED flags to - # reserver alpha channel - cv2_imread_flag = cv2.IMREAD_UNCHANGED - - parts = line.strip().split(cfg.DATASET.SEPARATOR) - if len(parts) != 2: - if mode == ModelPhase.TRAIN or mode == ModelPhase.EVAL: - raise Exception("File list format incorrect! It should be image_name {} label_name\\n".format(cfg.DATASET.SEPARATOR)) - img_name, grt_name = parts[0], None - else: - img_name, grt_name = parts[0], parts[1] - - img_path = os.path.join(src_dir, img_name) - img = self.cv2_imread(img_path, cv2_imread_flag) - - if grt_name is not None: - grt_path = os.path.join(src_dir, grt_name) - grt = self.pil_imread(grt_path) - else: - grt = None - - img_height = img.shape[0] - img_width = img.shape[1] - if grt is not None: - grt_height = grt.shape[0] - grt_width = grt.shape[1] - id_to_trainid = [255, 255, 255, 255, 255, - 255, 255, 255, 0, 1, - 255, 255, 2, 3, 4, - 255, 255, 255, 5, 255, - 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, - 255, 255, 16, 17, 18] - grt_ = np.zeros([grt_height, grt_width]) - - for h in range(grt_height): - for w in range(grt_width): - grt_[h][w] = id_to_trainid[int(grt[h][w])+1] - - if img_height != grt_height or img_width != grt_width: - raise Exception("source img and label img must has the same size") - else: - if mode == ModelPhase.TRAIN or mode == ModelPhase.EVAL: - raise Exception("Empty image, src_dir: {}, img: {} & lab: {}".format(src_dir, img_path, grt_path)) - - if len(img.shape) < 3: - img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - - return img, grt_, img_name, grt_name diff --git a/PaddleCV/Research/SemSegPaddle/src/datasets/data_utils.py b/PaddleCV/Research/SemSegPaddle/src/datasets/data_utils.py deleted file mode 100644 index 65bea35f1ade62289e704271a6a37af27c8c2c7c..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/datasets/data_utils.py +++ /dev/null @@ -1,115 +0,0 @@ -""" -This code is based on https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py -""" - -import time -import numpy as np -import threading -import multiprocessing -try: - import queue -except ImportError: - import Queue as queue - - -class GeneratorEnqueuer(object): - """ - Multiple generators - - Args: - generators: - wait_time (float): time to sleep in-between calls to `put()`. - """ - - def __init__(self, generators, wait_time=0.05): - self.wait_time = wait_time - self._generators = generators - self._threads = [] - self._stop_events = [] - self.queue = None - self._manager = None - self.workers = 1 - - def start(self, workers=1, max_queue_size=16): - """ - Start worker threads which add data from the generator into the queue. - - Args: - workers (int): number of worker threads - max_queue_size (int): queue size - (when full, threads could block on `put()`) - """ - - self.workers = workers - - def data_generator_task(pid): - """ - Data generator task. - """ - - def task(pid): - if (self.queue is not None - and self.queue.qsize() < max_queue_size): - generator_output = next(self._generators[pid]) - self.queue.put((generator_output)) - else: - time.sleep(self.wait_time) - - while not self._stop_events[pid].is_set(): - try: - task(pid) - except Exception: - self._stop_events[pid].set() - break - - try: - self._manager = multiprocessing.Manager() - self.queue = self._manager.Queue(maxsize=max_queue_size) - for pid in range(self.workers): - self._stop_events.append(multiprocessing.Event()) - thread = multiprocessing.Process( - target=data_generator_task, args=(pid, )) - thread.daemon = True - self._threads.append(thread) - thread.start() - except: - self.stop() - raise - - def is_running(self): - """ - Returns: - bool: Whether the worker theads are running. - """ - - # If queue is not empty then still in runing state wait for consumer - if not self.queue.empty(): - return True - - for pid in range(self.workers): - if not self._stop_events[pid].is_set(): - return True - - return False - - def stop(self, timeout=None): - """ - Stops running threads and wait for them to exit, if necessary. - Should be called by the same thread which called `start()`. - - Args: - timeout(int|None): maximum time to wait on `thread.join()`. - """ - if self.is_running(): - for pid in range(self.workers): - self._stop_events[pid].set() - - for thread in self._threads: - if thread.is_alive(): - thread.join(timeout) - if self._manager: - self._manager.shutdown() - - self._threads = [] - self._stop_events = [] - self.queue = None diff --git a/PaddleCV/Research/SemSegPaddle/src/datasets/pascal_context.py b/PaddleCV/Research/SemSegPaddle/src/datasets/pascal_context.py deleted file mode 100644 index 44cbe4065d8f51588be8f24ee46ffddb9b02e368..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/datasets/pascal_context.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import print_function -import sys -import os -import math -import random -import functools -import io -import time -import codecs -import numpy as np -import paddle -import paddle.fluid as fluid -import cv2 -from PIL import Image -import copy - -from src.utils.config import cfg -from src.models.model_builder import ModelPhase -from .baseseg import BaseSeg - - -class PascalContextSeg(BaseSeg): - def __init__(self, - file_list, - data_dir, - shuffle=False, - mode=ModelPhase.TRAIN, base_size=520, crop_size=520, rand_scale=True): - super(PascalContextSeg, self).__init__(file_list, data_dir, shuffle, mode, base_size, crop_size, rand_scale) - - def _mask_transform(self, mask): - target = np.array(mask).astype('int32') - 1 - return target - - def load_image(self, line, src_dir, mode=ModelPhase.TRAIN): - # original image cv2.imread flag setting - cv2_imread_flag = cv2.IMREAD_COLOR - if cfg.DATASET.IMAGE_TYPE == "rgba": - # If use RBGA 4 channel ImageType, use IMREAD_UNCHANGED flags to - # reserver alpha channel - cv2_imread_flag = cv2.IMREAD_UNCHANGED - parts = line.strip().split(cfg.DATASET.SEPARATOR) - if len(parts) != 2: - if mode == ModelPhase.TRAIN or mode == ModelPhase.EVAL: - raise Exception("File list format incorrect! It should be" - " image_name{}label_name\\n".format( - cfg.DATASET.SEPARATOR)) - img_name, grt_name = parts[0], None - else: - img_name, grt_name = parts[0], parts[1] - - img_path = os.path.join(src_dir, img_name) - img = self.cv2_imread(img_path, cv2_imread_flag) - - if grt_name is not None: - grt_path = os.path.join(src_dir, grt_name) - grt = self.pil_imread(grt_path) - else: - grt = None - - if len(img.shape) < 3: - img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - - grt = self._mask_transform(grt) - return img, grt, img_name, grt_name - diff --git a/PaddleCV/Research/SemSegPaddle/src/models/__init__.py b/PaddleCV/Research/SemSegPaddle/src/models/__init__.py deleted file mode 100644 index 0d80c0d8102d2e8160af9b66d0609a2973209e51..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/models/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# coding: utf8 -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#import models.modeling -#import models.libs -#import models.backbone -from . import modeling, libs, backbone diff --git a/PaddleCV/Research/SemSegPaddle/src/models/backbone/__init__.py b/PaddleCV/Research/SemSegPaddle/src/models/backbone/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/PaddleCV/Research/SemSegPaddle/src/models/backbone/hrnet.py b/PaddleCV/Research/SemSegPaddle/src/models/backbone/hrnet.py deleted file mode 100644 index a8c3ab91e6d925791480ec2c2d4163a19a72b831..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/models/backbone/hrnet.py +++ /dev/null @@ -1,220 +0,0 @@ -# coding: utf8 -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA -from paddle.fluid.param_attr import ParamAttr - -from src.utils.config import cfg - -class HRNet(): - """ - Reference: - Sun, Ke, et al. "Deep High-Resolution Representation Learning for Human Pose Estimation.", In CVPR 2019 - """ - def __init__(self, stride=4, seg_flag=False): - self.stride= stride - self.seg_flag=seg_flag - - def conv_bn_layer(self, input, filter_size, num_filters, stride=1, padding=1, num_groups=1, if_act=True, name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=num_groups, - act=None, - param_attr=ParamAttr(initializer=MSRA(), name=name + '_weights'), - bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm(input=conv, - param_attr=ParamAttr(name=bn_name + "_scale", - initializer=fluid.initializer.Constant(1.0)), - bias_attr=ParamAttr(name=bn_name + "_offset", - initializer=fluid.initializer.Constant(0.0)), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if if_act: - bn = fluid.layers.relu(bn) - return bn - - - def basic_block(self, input, num_filters, stride=1, downsample=False, name=None): - residual = input - conv = self.conv_bn_layer(input=input, filter_size=3, num_filters=num_filters, stride=stride, name=name + '_conv1') - conv = self.conv_bn_layer(input=conv, filter_size=3, num_filters=num_filters, if_act=False, name=name + '_conv2') - if downsample: - residual = self.conv_bn_layer(input=input, filter_size=1, num_filters=num_filters, if_act=False, - name=name + '_downsample') - return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') - - - - def bottleneck_block(self, input, num_filters, stride=1, downsample=False, name=None): - residual = input - conv = self.conv_bn_layer(input=input, filter_size=1, num_filters=num_filters, name=name + '_conv1') - conv = self.conv_bn_layer(input=conv, filter_size=3, num_filters=num_filters, stride=stride, name=name + '_conv2') - conv = self.conv_bn_layer(input=conv, filter_size=1, num_filters=num_filters * 4, if_act=False, - name=name + '_conv3') - if downsample: - residual = self.conv_bn_layer(input=input, filter_size=1, num_filters=num_filters * 4, if_act=False, - name=name + '_downsample') - return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') - - def fuse_layers(self, x, channels, multi_scale_output=True, name=None): - out = [] - for i in range(len(channels) if multi_scale_output else 1): - residual = x[i] - shape = residual.shape - width = shape[-1] - height = shape[-2] - for j in range(len(channels)): - if j > i: - y = self.conv_bn_layer(x[j], filter_size=1, num_filters=channels[i], if_act=False, - name=name + '_layer_' + str(i + 1) + '_' + str(j + 1)) - y = fluid.layers.resize_bilinear(input=y, out_shape=[height, width]) - residual = fluid.layers.elementwise_add(x=residual, y=y, act=None) - elif j < i: - y = x[j] - for k in range(i - j): - if k == i - j - 1: - y = self.conv_bn_layer(y, filter_size=3, num_filters=channels[i], stride=2, if_act=False, - name=name + '_layer_' + str(i + 1) + '_' + str(j + 1) + '_' + str(k + 1)) - else: - y = self.conv_bn_layer(y, filter_size=3, num_filters=channels[j], stride=2, - name=name + '_layer_' + str(i + 1) + '_' + str(j + 1) + '_' + str(k + 1)) - residual = fluid.layers.elementwise_add(x=residual, y=y, act=None) - - residual = fluid.layers.relu(residual) - out.append(residual) - return out - - def branches(self, x, block_num, channels, name=None): - out = [] - for i in range(len(channels)): - residual = x[i] - for j in range(block_num): - residual = self.basic_block(residual, channels[i], - name=name + '_branch_layer_' + str(i + 1) + '_' + str(j + 1)) - out.append(residual) - return out - - def high_resolution_module(self, x, channels, multi_scale_output=True, name=None): - residual = self.branches(x, 4, channels, name=name) - out = self.fuse_layers(residual, channels, multi_scale_output=multi_scale_output, name=name) - return out - - def transition_layer(self, x, in_channels, out_channels, name=None): - num_in = len(in_channels) - num_out = len(out_channels) - out = [] - for i in range(num_out): - if i < num_in: - if in_channels[i] != out_channels[i]: - residual = self.conv_bn_layer(x[i], filter_size=3, num_filters=out_channels[i], - name=name + '_layer_' + str(i + 1)) - out.append(residual) - else: - out.append(x[i]) - else: - residual = self.conv_bn_layer(x[-1], filter_size=3, num_filters=out_channels[i], stride=2, - name=name + '_layer_' + str(i + 1)) - out.append(residual) - return out - - def stage(self, x, num_modules, channels, multi_scale_output=True, name=None): - out = x - for i in range(num_modules): - if i == num_modules - 1 and multi_scale_output == False: - out = self.high_resolution_module(out, channels, multi_scale_output=False, name=name + '_' + str(i + 1)) - else: - out = self.high_resolution_module(out, channels, name=name + '_' + str(i + 1)) - - return out - - def layer1(self, input, name=None): - conv = input - for i in range(4): - conv = self.bottleneck_block(conv, num_filters=64, downsample=True if i == 0 else False, - name=name + '_' + str(i + 1)) - return conv - - #def highResolutionNet(input, num_classes): - def net(self, input, num_classes=1000): - - channels_2 = cfg.MODEL.HRNET.STAGE2.NUM_CHANNELS - channels_3 = cfg.MODEL.HRNET.STAGE3.NUM_CHANNELS - channels_4 = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS - - num_modules_2 = cfg.MODEL.HRNET.STAGE2.NUM_MODULES - num_modules_3 = cfg.MODEL.HRNET.STAGE3.NUM_MODULES - num_modules_4 = cfg.MODEL.HRNET.STAGE4.NUM_MODULES - - x = self.conv_bn_layer(input=input, filter_size=3, num_filters=64, stride=2, if_act=True, name='layer1_1') - x = self.conv_bn_layer(input=x, filter_size=3, num_filters=64, stride=2, if_act=True, name='layer1_2') - - la1 = self.layer1(x, name='layer2') - tr1 = self.transition_layer([la1], [256], channels_2, name='tr1') - st2 = self.stage(tr1, num_modules_2, channels_2, name='st2') - tr2 = self.transition_layer(st2, channels_2, channels_3, name='tr2') - st3 = self.stage(tr2, num_modules_3, channels_3, name='st3') - tr3 = self.transition_layer(st3, channels_3, channels_4, name='tr3') - st4 = self.stage(tr3, num_modules_4, channels_4, name='st4') - - # upsample - shape = st4[0].shape - height, width = shape[-2], shape[-1] - st4[1] = fluid.layers.resize_bilinear(st4[1], out_shape=[height, width]) - st4[2] = fluid.layers.resize_bilinear(st4[2], out_shape=[height, width]) - st4[3] = fluid.layers.resize_bilinear(st4[3], out_shape=[height, width]) - - out = fluid.layers.concat(st4, axis=1) - if self.seg_flag and self.stride==4: - return out - - last_channels = sum(channels_4) - - out = conv_bn_layer(input=out, filter_size=1, num_filters=last_channels, stride=1, if_act=True, name='conv-2') - out= fluid.layers.conv2d( - input=out, - num_filters=num_classes, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(initializer=MSRA(), name='conv-1_weights'), - bias_attr=False) - - out = fluid.layers.resize_bilinear(out, input.shape[2:]) - - - return out - - -def hrnet(): - model = HRNet(stride=4, seg_flag=True) - return model - -if __name__ == '__main__': - image_shape = [3, 769, 769] - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - logit = hrnet(image, 4) - print("logit:", logit.shape) diff --git a/PaddleCV/Research/SemSegPaddle/src/models/backbone/mobilenet_v2.py b/PaddleCV/Research/SemSegPaddle/src/models/backbone/mobilenet_v2.py deleted file mode 100644 index 5022a7826d16ef2cb24237b3f11cfb23e5189ff3..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/models/backbone/mobilenet_v2.py +++ /dev/null @@ -1,302 +0,0 @@ -# coding: utf8 -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA -from paddle.fluid.param_attr import ParamAttr - -__all__ = [ - 'MobileNetV2', 'MobileNetV2_x0_25', 'MobileNetV2_x0_5', 'MobileNetV2_x1_0', - 'MobileNetV2_x1_5', 'MobileNetV2_x2_0', 'MobileNetV2_scale' -] - - - -class MobileNetV2(): - def __init__(self, scale=1.0, change_depth=False, output_stride=None): - self.scale = scale - self.change_depth = change_depth - self.bottleneck_params_list = [ - (1, 16, 1, 1), - (6, 24, 2, 2), - (6, 32, 3, 2), - (6, 64, 4, 2), - (6, 96, 3, 1), - (6, 160, 3, 2), - (6, 320, 1, 1), - ] if change_depth == False else [ - (1, 16, 1, 1), - (6, 24, 2, 2), - (6, 32, 5, 2), - (6, 64, 7, 2), - (6, 96, 5, 1), - (6, 160, 3, 2), - (6, 320, 1, 1), - ] - self.modify_bottle_params(output_stride) - - def modify_bottle_params(self, output_stride=None): - if output_stride is not None and output_stride % 2 != 0: - raise Exception("output stride must to be even number") - if output_stride is None: - return - else: - stride = 2 - for i, layer_setting in enumerate(self.bottleneck_params_list): - t, c, n, s = layer_setting - stride = stride * s - if stride > output_stride: - s = 1 - self.bottleneck_params_list[i] = (t, c, n, s) - - def net(self, input, class_dim=1000, end_points=None, decode_points=None): - scale = self.scale - change_depth = self.change_depth - #if change_depth is True, the new depth is 1.4 times as deep as before. - bottleneck_params_list = self.bottleneck_params_list - decode_ends = dict() - - def check_points(count, points): - if points is None: - return False - else: - if isinstance(points, list): - return (True if count in points else False) - else: - return (True if count == points else False) - - #conv1 - input = self.conv_bn_layer( - input, - num_filters=int(32 * scale), - filter_size=3, - stride=2, - padding=1, - if_act=True, - name='conv1_1') - layer_count = 1 - - #print("node test:", layer_count, input.shape) - - if check_points(layer_count, decode_points): - decode_ends[layer_count] = input - - if check_points(layer_count, end_points): - return input, decode_ends - - # bottleneck sequences - i = 1 - in_c = int(32 * scale) - for layer_setting in bottleneck_params_list: - t, c, n, s = layer_setting - i += 1 - input, depthwise_output = self.invresi_blocks( - input=input, - in_c=in_c, - t=t, - c=int(c * scale), - n=n, - s=s, - name='conv' + str(i)) - in_c = int(c * scale) - layer_count += n - - #print("node test:", layer_count, input.shape) - if check_points(layer_count, decode_points): - decode_ends[layer_count] = depthwise_output - - if check_points(layer_count, end_points): - return input, decode_ends - - #last_conv - input = self.conv_bn_layer( - input=input, - num_filters=int(1280 * scale) if scale > 1.0 else 1280, - filter_size=1, - stride=1, - padding=0, - if_act=True, - name='conv9') - - input = fluid.layers.pool2d( - input=input, - pool_size=7, - pool_stride=1, - pool_type='avg', - global_pooling=True) - - output = fluid.layers.fc( - input=input, - size=class_dim, - param_attr=ParamAttr(name='fc10_weights'), - bias_attr=ParamAttr(name='fc10_offset')) - return output - - def conv_bn_layer(self, - input, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - if_act=True, - name=None, - use_cudnn=True): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if if_act: - return fluid.layers.relu6(bn) - else: - return bn - - def shortcut(self, input, data_residual): - return fluid.layers.elementwise_add(input, data_residual) - - def inverted_residual_unit(self, - input, - num_in_filter, - num_filters, - ifshortcut, - stride, - filter_size, - padding, - expansion_factor, - name=None): - num_expfilter = int(round(num_in_filter * expansion_factor)) - - channel_expand = self.conv_bn_layer( - input=input, - num_filters=num_expfilter, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - name=name + '_expand') - - bottleneck_conv = self.conv_bn_layer( - input=channel_expand, - num_filters=num_expfilter, - filter_size=filter_size, - stride=stride, - padding=padding, - num_groups=num_expfilter, - if_act=True, - name=name + '_dwise', - use_cudnn=False) - - depthwise_output = bottleneck_conv - - linear_out = self.conv_bn_layer( - input=bottleneck_conv, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=False, - name=name + '_linear') - - if ifshortcut: - out = self.shortcut(input=input, data_residual=linear_out) - return out, depthwise_output - else: - return linear_out, depthwise_output - - def invresi_blocks(self, input, in_c, t, c, n, s, name=None): - first_block, depthwise_output = self.inverted_residual_unit( - input=input, - num_in_filter=in_c, - num_filters=c, - ifshortcut=False, - stride=s, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_1') - - last_residual_block = first_block - last_c = c - - for i in range(1, n): - last_residual_block, depthwise_output = self.inverted_residual_unit( - input=last_residual_block, - num_in_filter=last_c, - num_filters=c, - ifshortcut=True, - stride=1, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_' + str(i + 1)) - return last_residual_block, depthwise_output - - -def MobileNetV2_x0_25(): - model = MobileNetV2(scale=0.25) - return model - - -def MobileNetV2_x0_5(): - model = MobileNetV2(scale=0.5) - return model - - -def MobileNetV2_x1_0(): - model = MobileNetV2(scale=1.0) - return model - - -def MobileNetV2_x1_5(): - model = MobileNetV2(scale=1.5) - return model - - -def MobileNetV2_x2_0(): - model = MobileNetV2(scale=2.0) - return model - - -def MobileNetV2_scale(): - model = MobileNetV2(scale=1.2, change_depth=True) - return model - - -if __name__ == '__main__': - image_shape = [3, 224, 224] - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - model = MobileNetV2_x1_0() - logit, decode_ends = model.net(image) - #print("logit:", logit.shape) diff --git a/PaddleCV/Research/SemSegPaddle/src/models/backbone/resnet.py b/PaddleCV/Research/SemSegPaddle/src/models/backbone/resnet.py deleted file mode 100644 index d988ee9687cbe40f72453ede5d7af7f935b2cafa..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/models/backbone/resnet.py +++ /dev/null @@ -1,303 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import numpy as np -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from src.utils.config import cfg - -__all__ = [ - "ResNet", "ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152" -] - -class ResNet(): - def __init__(self, layers=50, scale=1.0): - self.layers = layers - self.scale = scale - - def net(self, - input, - class_dim=1000, - end_points=None, - decode_points=None, - resize_points=None, - dilation_dict=None): - layers = self.layers - supported_layers = [18, 34, 50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - decode_ends = dict() - - def check_points(count, points): - if points is None: - return False - else: - if isinstance(points, list): - return (True if count in points else False) - else: - return (True if count == points else False) - - def get_dilated_rate(dilation_dict, idx): - if dilation_dict is None or idx not in dilation_dict: - return 1 - else: - return dilation_dict[idx] - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters = [64, 128, 256, 512] - - # stage_1: 3 3x3_Conv - conv = self.conv_bn_layer( - input=input, - num_filters=int(64 * self.scale), - filter_size=3, - stride=2, - act='relu', - name="conv1_1") - conv = self.conv_bn_layer( - input=conv, - num_filters=int(64 * self.scale), - filter_size=3, - stride=1, - act='relu', - name="conv1_2") - conv = self.conv_bn_layer( - input=conv, - num_filters=int(128 * self.scale), - filter_size=3, - stride=1, - act='relu', - name="conv1_3") - - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - - layer_count = 1 - if check_points(layer_count, decode_points): - decode_ends[layer_count] = conv - - if check_points(layer_count, end_points): - return conv, decode_ends - - if layers >= 50: - for block in range(len(depth)): - for i in range(depth[block]): #depth = [3, 4, 23, 3] - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - dilation_rate = get_dilated_rate(dilation_dict, block) - # added by Rosun, employ multi-grid - if cfg.MODEL.BACKBONE_MULTI_GRID== True and block==3: - if i==0: - dilation_rate = dilation_rate*(i+1) - else: - dilation_rate = dilation_rate*(2*i) # 2, 4 - print("employ multi-grid for resnet backbone network: dilation_rate={}\n".format(dilation_rate)) - - conv = self.bottleneck_block( - input=conv, - num_filters=int(num_filters[block] * self.scale), - stride=2 - if i == 0 and block != 0 and dilation_rate == 1 else 1, - name=conv_name, - dilation=dilation_rate) - layer_count += 3 - - if check_points(layer_count, decode_points): - decode_ends[layer_count] = conv - - if check_points(layer_count, end_points): - return conv, decode_ends - - if check_points(layer_count, resize_points): - conv = self.interp( - conv, - np.ceil( - np.array(conv.shape[2:]).astype('int32') / 2)) - - pool = fluid.layers.pool2d(input=conv, pool_size=7, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc(input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) - else: - for block in range(len(depth)): - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.basic_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - is_first=block == i == 0, - name=conv_name) - layer_count += 2 - if check_points(layer_count, decode_points): - decode_ends[layer_count] = conv - - if check_points(layer_count, end_points): - return conv, decode_ends - - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - return out - - def zero_padding(self, input, padding): - return fluid.layers.pad( - input, [0, 0, 0, 0, padding, padding, padding, padding]) - - def interp(self, input, out_shape): - out_shape = list(out_shape.astype("int32")) - return fluid.layers.resize_bilinear(input, out_shape=out_shape) - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - dilation=1, - groups=1, - act=None, - name=None): - - bias_attr=False - - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2 if dilation == 1 else 0, - dilation=dilation, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=bias_attr, - name=name + '.conv2d.output.1') - - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm(input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', ) - - def shortcut(self, input, ch_out, stride, is_first, name): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1 or is_first == True: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, name, dilation=1): - if self.layers == 101: - strides = [1, stride] - else: - strides = [stride, 1] - - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=1, - dilation=1, - stride=strides[0], - act='relu', - name=name + "_branch2a") - if dilation > 1: - conv0 = self.zero_padding(conv0, dilation) - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - dilation=dilation, - stride=strides[1], - act='relu', - name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters * 4, - dilation=1, - filter_size=1, - act=None, - name=name + "_branch2c") - - short = self.shortcut( - input, - num_filters * 4, - stride, - is_first=False, - name=name + "_branch1") - - return fluid.layers.elementwise_add( - x=short, y=conv2, act='relu', name=name + ".add.output.5") - - def basic_block(self, input, num_filters, stride, is_first, name): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self.shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - -def ResNet18(): - model = ResNet(layers=18) - return model - - -def ResNet34(): - model = ResNet(layers=34) - return model - - -def ResNet50(): - model = ResNet(layers=50) - return model - - -def ResNet101(): - model = ResNet(layers=101) - return model - - -def ResNet152(): - model = ResNet(layers=152) - return model diff --git a/PaddleCV/Research/SemSegPaddle/src/models/backbone/xception.py b/PaddleCV/Research/SemSegPaddle/src/models/backbone/xception.py deleted file mode 100644 index be84e3ba0a83f0650ab35e5653722a8da0de4bd2..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/models/backbone/xception.py +++ /dev/null @@ -1,317 +0,0 @@ -# coding: utf8 -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import contextlib -import paddle -import math -import paddle.fluid as fluid -from src.models.libs.model_libs import scope, name_scope -from src.models.libs.model_libs import bn, bn_relu, relu -from src.models.libs.model_libs import conv -from src.models.libs.model_libs import separate_conv - -__all__ = ['xception_65', 'xception_41', 'xception_71'] - - -def check_data(data, number): - if type(data) == int: - return [data] * number - assert len(data) == number - return data - - -def check_stride(s, os): - if s <= os: - return True - else: - return False - - -def check_points(count, points): - if points is None: - return False - else: - if isinstance(points, list): - return (True if count in points else False) - else: - return (True if count == points else False) - - -class Xception(): - def __init__(self, backbone="xception_65"): - self.bottleneck_params = self.gen_bottleneck_params(backbone) - self.backbone = backbone - - def gen_bottleneck_params(self, backbone='xception_65'): - if backbone == 'xception_65': - bottleneck_params = { - "entry_flow": (3, [2, 2, 2], [128, 256, 728]), - "middle_flow": (16, 1, 728), - "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, - 2048]]) - } - elif backbone == 'xception_41': - bottleneck_params = { - "entry_flow": (3, [2, 2, 2], [128, 256, 728]), - "middle_flow": (8, 1, 728), - "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, - 2048]]) - } - elif backbone == 'xception_71': - bottleneck_params = { - "entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]), - "middle_flow": (16, 1, 728), - "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, - 2048]]) - } - else: - raise Exception( - "xception backbont only support xception_41/xception_65/xception_71" - ) - return bottleneck_params - - def net(self, - input, - output_stride=32, - num_classes=1000, - end_points=None, - decode_points=None): - self.stride = 2 - self.block_point = 0 - self.output_stride = output_stride - self.decode_points = decode_points - self.short_cuts = dict() - with scope(self.backbone): - # Entry flow - data = self.entry_flow(input) - if check_points(self.block_point, end_points): - return data, self.short_cuts - - # Middle flow - data = self.middle_flow(data) - if check_points(self.block_point, end_points): - return data, self.short_cuts - - # Exit flow - data = self.exit_flow(data) - if check_points(self.block_point, end_points): - return data, self.short_cuts - - data = fluid.layers.reduce_mean(data, [2, 3], keep_dim=True) - data = fluid.layers.dropout(data, 0.5) - stdv = 1.0 / math.sqrt(data.shape[1] * 1.0) - with scope("logit"): - out = fluid.layers.fc( - input=data, - size=num_classes, - act='softmax', - param_attr=fluid.param_attr.ParamAttr( - name='weights', - initializer=fluid.initializer.Uniform(-stdv, stdv)), - bias_attr=fluid.param_attr.ParamAttr(name='bias')) - - return out - - def entry_flow(self, data): - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.09)) - with scope("entry_flow"): - with scope("conv1"): - data = bn_relu( - conv( - data, 32, 3, stride=2, padding=1, - param_attr=param_attr)) - with scope("conv2"): - data = bn_relu( - conv( - data, 64, 3, stride=1, padding=1, - param_attr=param_attr)) - - # get entry flow params - block_num = self.bottleneck_params["entry_flow"][0] - strides = self.bottleneck_params["entry_flow"][1] - chns = self.bottleneck_params["entry_flow"][2] - strides = check_data(strides, block_num) - chns = check_data(chns, block_num) - - # params to control your flow - s = self.stride - block_point = self.block_point - output_stride = self.output_stride - with scope("entry_flow"): - for i in range(block_num): - block_point = block_point + 1 - with scope("block" + str(i + 1)): - stride = strides[i] if check_stride(s * strides[i], - output_stride) else 1 - data, short_cuts = self.xception_block( - data, chns[i], [1, 1, stride]) - s = s * stride - if check_points(block_point, self.decode_points): - self.short_cuts[block_point] = short_cuts[1] - - self.stride = s - self.block_point = block_point - return data - - def middle_flow(self, data): - block_num = self.bottleneck_params["middle_flow"][0] - strides = self.bottleneck_params["middle_flow"][1] - chns = self.bottleneck_params["middle_flow"][2] - strides = check_data(strides, block_num) - chns = check_data(chns, block_num) - - # params to control your flow - s = self.stride - block_point = self.block_point - output_stride = self.output_stride - with scope("middle_flow"): - for i in range(block_num): - block_point = block_point + 1 - with scope("block" + str(i + 1)): - stride = strides[i] if check_stride(s * strides[i], - output_stride) else 1 - data, short_cuts = self.xception_block( - data, chns[i], [1, 1, strides[i]], skip_conv=False) - s = s * stride - if check_points(block_point, self.decode_points): - self.short_cuts[block_point] = short_cuts[1] - - self.stride = s - self.block_point = block_point - return data - - def exit_flow(self, data): - block_num = self.bottleneck_params["exit_flow"][0] - strides = self.bottleneck_params["exit_flow"][1] - chns = self.bottleneck_params["exit_flow"][2] - strides = check_data(strides, block_num) - chns = check_data(chns, block_num) - - assert (block_num == 2) - # params to control your flow - s = self.stride - block_point = self.block_point - output_stride = self.output_stride - with scope("exit_flow"): - with scope('block1'): - block_point += 1 - stride = strides[0] if check_stride(s * strides[0], - output_stride) else 1 - data, short_cuts = self.xception_block(data, chns[0], - [1, 1, stride]) - s = s * stride - if check_points(block_point, self.decode_points): - self.short_cuts[block_point] = short_cuts[1] - with scope('block2'): - block_point += 1 - stride = strides[1] if check_stride(s * strides[1], - output_stride) else 1 - data, short_cuts = self.xception_block( - data, - chns[1], [1, 1, stride], - dilation=2, - has_skip=False, - activation_fn_in_separable_conv=True) - s = s * stride - if check_points(block_point, self.decode_points): - self.short_cuts[block_point] = short_cuts[1] - - self.stride = s - self.block_point = block_point - return data - - def xception_block(self, - input, - channels, - strides=1, - filters=3, - dilation=1, - skip_conv=True, - has_skip=True, - activation_fn_in_separable_conv=False): - repeat_number = 3 - channels = check_data(channels, repeat_number) - filters = check_data(filters, repeat_number) - strides = check_data(strides, repeat_number) - data = input - results = [] - for i in range(repeat_number): - with scope('separable_conv' + str(i + 1)): - if not activation_fn_in_separable_conv: - data = relu(data) - data = separate_conv( - data, - channels[i], - strides[i], - filters[i], - dilation=dilation) - else: - data = separate_conv( - data, - channels[i], - strides[i], - filters[i], - dilation=dilation, - act=relu) - results.append(data) - if not has_skip: - return data, results - if skip_conv: - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=None, - initializer=fluid.initializer.TruncatedNormal( - loc=0.0, scale=0.09)) - with scope('shortcut'): - skip = bn( - conv( - input, - channels[-1], - 1, - strides[-1], - groups=1, - padding=0, - param_attr=param_attr)) - else: - skip = input - return data + skip, results - - -def xception_65(): - model = Xception("xception_65") - return model - - -def xception_41(): - model = Xception("xception_41") - return model - - -def xception_71(): - model = Xception("xception_71") - return model - - -if __name__ == '__main__': - image_shape = [3, 224, 224] - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - model = xception_65() - logit = model.net(image) diff --git a/PaddleCV/Research/SemSegPaddle/src/models/libs/__init__.py b/PaddleCV/Research/SemSegPaddle/src/models/libs/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/PaddleCV/Research/SemSegPaddle/src/models/libs/model_libs.py b/PaddleCV/Research/SemSegPaddle/src/models/libs/model_libs.py deleted file mode 100644 index cae973a2262fbcdd58c5eda85c0a8d981bfd98fe..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/models/libs/model_libs.py +++ /dev/null @@ -1,219 +0,0 @@ -# coding: utf8 -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import sys -import paddle -import paddle.fluid as fluid -from src.utils.config import cfg -import contextlib - -bn_regularizer = fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0) -name_scope = "" - - -@contextlib.contextmanager -def scope(name): - global name_scope - bk = name_scope - name_scope = name_scope + name + '/' - yield - name_scope = bk - - -def max_pool(input, kernel, stride, padding): - data = fluid.layers.pool2d( - input, - pool_size=kernel, - pool_type='max', - pool_stride=stride, - pool_padding=padding) - return data - - -def avg_pool(input, kernel, stride, padding=0): - data = fluid.layers.pool2d( - input, - pool_size=kernel, - pool_type='avg', - pool_stride=stride, - pool_padding=padding) - return data - - -def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None): - N, C, H, W = input.shape - if C % G != 0: - # print "group can not divide channle:", C, G - for d in range(10): - for t in [d, -d]: - if G + t <= 0: continue - if C % (G + t) == 0: - G = G + t - break - if C % G == 0: - # print "use group size:", G - break - assert C % G == 0 - x = fluid.layers.group_norm( - input, - groups=G, - param_attr=param_attr, - bias_attr=bias_attr, - name=name_scope + 'group_norm') - return x - - -def bn(*args, **kargs): - if cfg.MODEL.DEFAULT_NORM_TYPE == 'bn': - with scope('BatchNorm'): - return fluid.layers.batch_norm( - *args, - epsilon=cfg.MODEL.DEFAULT_EPSILON, - momentum=cfg.MODEL.BN_MOMENTUM, - param_attr=fluid.ParamAttr( - name=name_scope + 'gamma', regularizer=bn_regularizer), - bias_attr=fluid.ParamAttr( - name=name_scope + 'beta', regularizer=bn_regularizer), - moving_mean_name=name_scope + 'moving_mean', - moving_variance_name=name_scope + 'moving_variance', - **kargs) - elif cfg.MODEL.DEFAULT_NORM_TYPE == 'gn': - with scope('GroupNorm'): - return group_norm( - args[0], - cfg.MODEL.DEFAULT_GROUP_NUMBER, - eps=cfg.MODEL.DEFAULT_EPSILON, - param_attr=fluid.ParamAttr( - name=name_scope + 'gamma', regularizer=bn_regularizer), - bias_attr=fluid.ParamAttr( - name=name_scope + 'beta', regularizer=bn_regularizer)) - else: - raise Exception("Unsupport norm type:" + cfg.MODEL.DEFAULT_NORM_TYPE) - -def bn_zero(*args, **kargs): - if cfg.MODEL.DEFAULT_NORM_TYPE == 'bn': - with scope('BatchNormZeroInit'): - return fluid.layers.batch_norm( - *args, - epsilon=cfg.MODEL.DEFAULT_EPSILON, - momentum=cfg.MODEL.BN_MOMENTUM, - param_attr=fluid.ParamAttr( - name=name_scope + 'gamma', regularizer=bn_regularizer, - initializer=fluid.initializer.ConstantInitializer(value=0.0)), - bias_attr=fluid.ParamAttr( - name=name_scope + 'beta', regularizer=bn_regularizer, - initializer=fluid.initializer.ConstantInitializer(value=0.0)), - moving_mean_name=name_scope + 'moving_mean', - moving_variance_name=name_scope + 'moving_variance', - **kargs) - - -def bn_relu(data): - return fluid.layers.relu(bn(data)) - - -def relu(data): - return fluid.layers.relu(data) - - -def conv(*args, **kargs): - kargs['param_attr'] = name_scope + 'weights' - if 'bias_attr' in kargs and kargs['bias_attr']: - kargs['bias_attr'] = fluid.ParamAttr( - name=name_scope + 'biases', - regularizer=None, - initializer=fluid.initializer.ConstantInitializer(value=0.0)) - else: - kargs['bias_attr'] = False - return fluid.layers.conv2d(*args, **kargs) - - -def deconv(*args, **kargs): - kargs['param_attr'] = name_scope + 'weights' - if 'bias_attr' in kargs and kargs['bias_attr']: - kargs['bias_attr'] = name_scope + 'biases' - else: - kargs['bias_attr'] = False - return fluid.layers.conv2d_transpose(*args, **kargs) - - -def separate_conv(input, channel, stride, filter, dilation=1, act=None): - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.33)) - with scope('depthwise'): - input = conv( - input, - input.shape[1], - filter, - stride, - groups=input.shape[1], - padding=(filter // 2) * dilation, - dilation=dilation, - use_cudnn=False, - param_attr=param_attr) - input = bn(input) - if act: input = act(input) - - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) - with scope('pointwise'): - input = conv( - input, channel, 1, 1, groups=1, padding=0, param_attr=param_attr) - input = bn(input) - if act: input = act(input) - return input - - -def FCNHead(input, mid_feat_channel, num_classes, output_shape): - # Arch: Conv_3x3 + BN + ReLU + Dropout + Conv_1x1 - - # Conv_3x3 + BN + ReLU - aux_seg_name= "Aux_layer1" - with scope(aux_seg_name): - conv_feat= conv(input, mid_feat_channel, filter_size=3, padding=1, bias_attr=False, name=aux_seg_name + '_conv') - bn_feat = bn(conv_feat, act='relu') - # Dropout - dropout_out = fluid.layers.dropout(bn_feat, dropout_prob=0.1, name="Aux_dropout") - - # Conv_1x1 + bilinear_upsample - aux_seg_name= "Aux_layer2" - with scope(aux_seg_name): - aux_logit = conv(dropout_out, num_classes, filter_size=1, bias_attr=True, name= aux_seg_name + '_logit_conv') - aux_logit_interp = fluid.layers.resize_bilinear(aux_logit, out_shape=output_shape, name= aux_seg_name + '_logit_interp') - - return aux_logit_interp - -def conv1d(x, output_channels, name_scope, bias_attr=False): - ''' - x:B, C, N - reshape to 4D --> conv2d --> reshape to 3D - ''' - B, C, N = x.shape - with scope(name_scope): - x = fluid.layers.reshape(x, shape=[B, C, N, 1]) - if bias_attr: - x = conv(x, output_channels, filter_size=1, name=name_scope, bias_attr=bias_attr) - else: - x = conv(x, output_channels, filter_size=1, name=name_scope) - x = fluid.layers.reshape(x, shape=[B, C, N]) - return x diff --git a/PaddleCV/Research/SemSegPaddle/src/models/model_builder.py b/PaddleCV/Research/SemSegPaddle/src/models/model_builder.py deleted file mode 100644 index 6ffc133fc862783c72825fc38c82ce4d271d3c41..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/models/model_builder.py +++ /dev/null @@ -1,273 +0,0 @@ -import sys -import struct -import importlib - -import paddle.fluid as fluid -import numpy as np -from paddle.fluid.proto.framework_pb2 import VarType - -from src.utils import solver -from src.utils.config import cfg -from src.utils.loss import multi_softmax_with_loss, multi_dice_loss, multi_bce_loss - - -class ModelPhase(object): - """ - Standard name for model phase in PaddleSeg - - The following standard keys are defined: - * `TRAIN`: training mode. - * `EVAL`: testing/evaluation mode. - * `PREDICT`: prediction/inference mode. - * `VISUAL` : visualization mode - """ - - TRAIN = 'train' - EVAL = 'eval' - PREDICT = 'predict' - VISUAL = 'visual' - - @staticmethod - def is_train(phase): - return phase == ModelPhase.TRAIN - - @staticmethod - def is_predict(phase): - return phase == ModelPhase.PREDICT - - @staticmethod - def is_eval(phase): - return phase == ModelPhase.EVAL - - @staticmethod - def is_visual(phase): - return phase == ModelPhase.VISUAL - - @staticmethod - def is_valid_phase(phase): - """ Check valid phase """ - if ModelPhase.is_train(phase) or ModelPhase.is_predict(phase) \ - or ModelPhase.is_eval(phase) or ModelPhase.is_visual(phase): - return True - - return False - - -def map_model_name(model_name): - name_dict = { - "deeplabv3": "deeplabv3.deeplabv3", - "pspnet": "pspnet.pspnet", - "glore": "glore.glore", - } - if model_name in name_dict.keys(): - return name_dict[model_name] - else: - raise Exception( - "unknow model name, only support unet, deeplabv3p, icnet") - - -def get_func(func_name): - """Helper to return a function object by name. func_name must identify a - function in this module or the path to a function relative to the base - 'modeling' module. - """ - print("func_name: ", func_name) - if func_name == '': - return None - try: - parts = func_name.split('.') - # Refers to a function in this module - if len(parts) == 1: - return globals()[parts[0]] - # Otherwise, assume we're referencing a module under modeling - module_name = 'src.models.' + '.'.join(parts[:-1]) - print("module_name: ", module_name) - # method 1 - #from src.models.modeling import pspnet - # method 2 - module = importlib.import_module(module_name) - return getattr(module, parts[-1]) - except Exception: - print('Failed to find function: {}'.format(func_name)) - return module - - -def softmax(logit): - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - logit = fluid.layers.softmax(logit) - logit = fluid.layers.transpose(logit, [0, 3, 1, 2]) - return logit - -def sigmoid_to_softmax(logit): - """ - one channel to two channel - """ - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - logit = fluid.layers.sigmoid(logit) - logit_back = 1 - logit - logit = fluid.layers.concat([logit_back, logit], axis=-1) - logit = fluid.layers.transpose(logit, [0, 3, 1, 2]) - return logit - - -def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): - if not ModelPhase.is_valid_phase(phase): - raise ValueError("ModelPhase {} is not valid!".format(phase)) - if ModelPhase.is_train(phase): - width = cfg.DATAAUG.CROP_SIZE - height = cfg.DATAAUG.CROP_SIZE - else: - width = cfg.TEST.CROP_SIZE - height = cfg.TEST.CROP_SIZE - - image_shape = [cfg.DATASET.DATA_DIM, height, width] - grt_shape = [1, height, width] - class_num = cfg.DATASET.NUM_CLASSES - - with fluid.program_guard(main_prog, start_prog): - with fluid.unique_name.guard(): - # 在导出模型的时候,增加图像标准化预处理,减小预测部署时图像的处理流程 - # 预测部署时只须对输入图像增加batch_size维度即可 - if ModelPhase.is_predict(phase): - origin_image = fluid.layers.data(name='image', - shape=[ -1, 1, 1, cfg.DATASET.DATA_DIM], - dtype='float32', - append_batch_size=False) - image = fluid.layers.transpose(origin_image, [0, 3, 1, 2]) - origin_shape = fluid.layers.shape(image)[-2:] - mean = np.array(cfg.MEAN).reshape(1, len(cfg.MEAN), 1, 1) - mean = fluid.layers.assign(mean.astype('float32')) - std = np.array(cfg.STD).reshape(1, len(cfg.STD), 1, 1) - std = fluid.layers.assign(std.astype('float32')) - image = (image/255 - mean)/std - image = fluid.layers.resize_bilinear(image, - out_shape=[height, width], align_corners=False, align_mode=0) - else: - image = fluid.layers.data( name='image', shape=image_shape, dtype='float32') - label = fluid.layers.data( name='label', shape=grt_shape, dtype='int32') - mask = fluid.layers.data( name='mask', shape=grt_shape, dtype='int32') - - # use PyReader when doing traning and evaluation - if ModelPhase.is_train(phase) or ModelPhase.is_eval(phase): - iterable = True if ModelPhase.is_eval(phase) else False - print("iterable: ", iterable) - py_reader = fluid.io.PyReader( - feed_list=[image, label, mask], - capacity=cfg.DATALOADER.BUF_SIZE, - iterable=iterable, - use_double_buffer=True, - return_list=False) - - model_name = map_model_name(cfg.MODEL.MODEL_NAME) - model_func = get_func("modeling." + model_name) - - loss_type = cfg.SOLVER.LOSS - if not isinstance(loss_type, list): - loss_type = list(loss_type) - - # dice_loss或bce_loss只适用两类分割中 - if class_num > 2 and (("dice_loss" in loss_type) or ("bce_loss" in loss_type)): - raise Exception("dice loss and bce loss is only applicable to binary classfication") - - # 在两类分割情况下,当loss函数选择dice_loss或bce_loss的时候,最后logit输出通道数设置为1 - if ("dice_loss" in loss_type) or ("bce_loss" in loss_type): - class_num = 1 - if "softmax_loss" in loss_type: - raise Exception("softmax loss can not combine with dice loss or bce loss") - - logits = model_func(image, class_num) - - # 根据选择的loss函数计算相应的损失函数 - if ModelPhase.is_train(phase) or ModelPhase.is_eval(phase): - loss_valid = False - avg_loss_list = [] - valid_loss = [] - if "softmax_loss" in loss_type: - avg_loss_list.append(multi_softmax_with_loss(logits, - label, mask,class_num)) - loss_valid = True - valid_loss.append("softmax_loss") - if "dice_loss" in loss_type: - avg_loss_list.append(multi_dice_loss(logits, label, mask)) - loss_valid = True - valid_loss.append("dice_loss") - if "bce_loss" in loss_type: - avg_loss_list.append(multi_bce_loss(logits, label, mask)) - loss_valid = True - valid_loss.append("bce_loss") - if not loss_valid: - raise Exception("SOLVER.LOSS: {} is set wrong. it should " - "include one of (softmax_loss, bce_loss, dice_loss) at least" - " example: ['softmax_loss'], ['dice_loss'], ['bce_loss', 'dice_loss']".format(cfg.SOLVER.LOSS)) - - invalid_loss = [x for x in loss_type if x not in valid_loss] - if len(invalid_loss) > 0: - print("Warning: the loss {} you set is invalid. it will not be included in loss computed.".format(invalid_loss)) - - avg_loss = 0 - for i in range(0, len(avg_loss_list)): - avg_loss += avg_loss_list[i] - - #get pred result in original size - if isinstance(logits, tuple): - logit = logits[0] - else: - logit = logits - - if logit.shape[2:] != label.shape[2:]: - logit = fluid.layers.resize_bilinear(logit, label.shape[2:]) - - # return image input and logit output for inference graph prune - if ModelPhase.is_predict(phase): - # 两类分割中,使用dice_loss或bce_loss返回的logit为单通道,进行到两通道的变换 - if class_num == 1: - logit = sigmoid_to_softmax(logit) - else: - logit = softmax(logit) - logit = fluid.layers.resize_bilinear(logit, out_shape=origin_shape, align_corners=False, align_mode=0) - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - logit = fluid.layers.argmax(logit, axis=3) - return origin_image, logit - - if class_num == 1: - out = sigmoid_to_softmax(logit) - out = fluid.layers.transpose(out, [0, 2, 3, 1]) - else: - out = fluid.layers.transpose(logit, [0, 2, 3, 1]) - - pred = fluid.layers.argmax(out, axis=3) - pred = fluid.layers.unsqueeze(pred, axes=[3]) - if ModelPhase.is_visual(phase): - if class_num == 1: - logit = sigmoid_to_softmax(logit) - else: - logit = softmax(logit) - return pred, logit - - if ModelPhase.is_eval(phase): - out = fluid.layers.transpose(out, [0, 3, 1, 2]) #unnormalized probability - #return py_reader, avg_loss, pred, label, mask - return py_reader, avg_loss, out, label, mask - - if ModelPhase.is_train(phase): - optimizer = solver.Solver(main_prog, start_prog) - decayed_lr = optimizer.optimise(avg_loss) - return py_reader, avg_loss, decayed_lr, pred, label, mask - - -def to_int(string, dest="I"): - return struct.unpack(dest, string)[0] - - -def parse_shape_from_file(filename): - with open(filename, "rb") as file: - version = file.read(4) - lod_level = to_int(file.read(8), dest="Q") - for i in range(lod_level): - _size = to_int(file.read(8), dest="Q") - _ = file.read(_size) - version = file.read(4) - tensor_desc_size = to_int(file.read(4)) - tensor_desc = VarType.TensorDesc() - tensor_desc.ParseFromString(file.read(tensor_desc_size)) - return tuple(tensor_desc.dims) diff --git a/PaddleCV/Research/SemSegPaddle/src/models/modeling/__init__.py b/PaddleCV/Research/SemSegPaddle/src/models/modeling/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/PaddleCV/Research/SemSegPaddle/src/models/modeling/deeplabv3.py b/PaddleCV/Research/SemSegPaddle/src/models/modeling/deeplabv3.py deleted file mode 100644 index 4a4fd31554d71ad38a5f5a8a4494146f9cfbf4f8..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/models/modeling/deeplabv3.py +++ /dev/null @@ -1,174 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import contextlib -import paddle -import paddle.fluid as fluid -from src.utils.config import cfg -from src.models.libs.model_libs import scope, name_scope -from src.models.libs.model_libs import bn, bn_relu, relu, FCNHead -from src.models.libs.model_libs import conv -from src.models.libs.model_libs import separate_conv -from src.models.backbone.mobilenet_v2 import MobileNetV2 as mobilenet_backbone -from src.models.backbone.xception import Xception as xception_backbone -from src.models.backbone.resnet import ResNet as resnet_backbone -from src.models.backbone.hrnet import HRNet as hrnet_backbone - - - -def ASPPHead(input, mid_channel, num_classes, output_shape): - # Arch of Atorus Spatial Pyramid Pooling Module: - # - # |----> ImagePool + Conv_1x1 + BN + ReLU + bilinear_interp-------->|————————| - # | | | - # |----> Conv_1x1 + BN + ReLU -------->| | - # | | | - # x----->|----> AtrousConv_3x3 + BN + ReLU -------->| concat |----> Conv_1x1 + BN + ReLU -->Dropout --> Conv_1x1 - # | | | - # |----> AtrousConv_3x3 + BN + ReLU -------->| | - # | | | - # |----> AtorusConv_3x3 + BN + ReLU -------->|________| - # - # - - if cfg.MODEL.BACKBONE_OUTPUT_STRIDE == 16: - aspp_ratios = [6, 12, 18] - elif cfg.MODEL.BACKBONE_OUTPUT_STRIDE == 8: - aspp_ratios = [12, 24, 36] - else: - raise Exception("deeplab only support stride 8 or 16") - - param_attr = fluid.ParamAttr(name=name_scope + 'weights', regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) - with scope('ASPPHead'): - with scope("image_pool"): - image_avg = fluid.layers.reduce_mean( input, [2, 3], keep_dim=True) - image_avg = bn_relu( conv( image_avg, mid_channel, 1, 1, groups=1, padding=0, param_attr=param_attr)) - image_avg = fluid.layers.resize_bilinear(image_avg, input.shape[2:]) - - with scope("aspp0"): - aspp0 = bn_relu( conv( input, mid_channel, 1, 1, groups=1, padding=0, param_attr=param_attr)) - with scope("aspp1"): - if cfg.MODEL.DEEPLAB.ASPP_WITH_SEP_CONV: - aspp1 = separate_conv( input, mid_channel, 1, 3, dilation=aspp_ratios[0], act=relu) - else: - aspp1 = bn_relu( conv( input, mid_channel, stride=1, filter_size=3, dilation=aspp_ratios[0], - padding=aspp_ratios[0], param_attr=param_attr)) - with scope("aspp2"): - if cfg.MODEL.DEEPLAB.ASPP_WITH_SEP_CONV: - aspp2 = separate_conv( input, mid_channel, 1, 3, dilation=aspp_ratios[1], act=relu) - else: - aspp2 = bn_relu( conv( input, mid_channel, stride=1, filter_size=3, dilation=aspp_ratios[1], - padding=aspp_ratios[1], param_attr=param_attr)) - with scope("aspp3"): - if cfg.MODEL.DEEPLAB.ASPP_WITH_SEP_CONV: - aspp3 = separate_conv( input, mid_channel, 1, 3, dilation=aspp_ratios[2], act=relu) - else: - aspp3 = bn_relu( conv( input, mid_channel, stride=1, filter_size=3, dilation=aspp_ratios[2], - padding=aspp_ratios[2], param_attr=param_attr)) - with scope("concat"): - feat = fluid.layers.concat([image_avg, aspp0, aspp1, aspp2, aspp3], axis=1) - feat = bn_relu( conv( feat, 2*mid_channel, 1, 1, groups=1, padding=0, param_attr=param_attr)) - feat = fluid.layers.dropout(feat, 0.1) - - # Conv_1x1 + bilinear_upsample - seg_name = "logit" - with scope(seg_name): - param_attr = fluid.ParamAttr( name= seg_name+'_weights', - regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) - logit = conv(feat, num_classes, filter_size=1, param_attr=param_attr, bias_attr=True, name=seg_name+'_conv') - logit_interp = fluid.layers.resize_bilinear(logit, out_shape=output_shape, name=seg_name+'_interp') - - return logit_interp - - - -def mobilenetv2(input): - # Backbone: mobilenetv2结构配置 - # DEPTH_MULTIPLIER: mobilenetv2的scale设置,默认1.0 - # OUTPUT_STRIDE:下采样倍数 - # end_points: mobilenetv2的block数 - # decode_point: 从mobilenetv2中引出分支所在block数, 作为decoder输入 - scale = cfg.MODEL.DEEPLABv3.DEPTH_MULTIPLIER - output_stride = cfg.MODEL.DEEPLABv3.OUTPUT_STRIDE - model = mobilenet_backbone(scale=scale, output_stride=output_stride) - end_points = 18 - decode_point = 4 - data, decode_shortcuts = model.net( - input, end_points=end_points, decode_points=decode_point) - decode_shortcut = decode_shortcuts[decode_point] - return data, decode_shortcut - - -def xception(input): - # Backbone: Xception结构配置, xception_65, xception_41, xception_71三种可选 - # decode_point: 从Xception中引出分支所在block数,作为decoder输入 - # end_point:Xception的block数 - cfg.MODEL.DEFAULT_EPSILON = 1e-3 - model = xception_backbone(cfg.MODEL.BACKBONE) - backbone = cfg.MODEL.BACKBONE - output_stride = cfg.MODEL.DEEPLABv3.OUTPUT_STRIDE - if '65' in backbone: - decode_point = 2 - end_points = 21 - if '41' in backbone: - decode_point = 2 - end_points = 13 - if '71' in backbone: - decode_point = 3 - end_points = 23 - data, decode_shortcuts = model.net( - input, - output_stride=output_stride, - end_points=end_points, - decode_points=decode_point) - decode_shortcut = decode_shortcuts[decode_point] - return data, decode_shortcut - - -def resnet(input): - # dilation_dict: - # key: stage num - # value: dilation factor - - scale = cfg.MODEL.DEEPLABv3.DEPTH_MULTIPLIER - layers = cfg.MODEL.BACKBONE_LAYERS - end_points = layers - 1 - decode_points = [91,100 ] # [10, 22, 91, 100], for obtaining feature maps of res2,res3, res4, and res5 - dilation_dict = {2:2, 3:4} - model = resnet_backbone(layers, scale) - res5, feat_dict = model.net(input, - end_points=end_points, - dilation_dict=dilation_dict, - decode_points=decode_points) - return res5, feat_dict - - -def hrnet(input): - model = hrnet_backbone(stride=4, seg_flag=True) - feats = model.net(input) - return feats - -def deeplabv3(input, num_classes): - """ - Chen, Liang-Chieh, et al. "Rethinking atrous convolution for semantic image segmentation", in arXiv:1706:05587 - """ - if 'xception' in cfg.MODEL.BACKBONE: - data, decode_shortcut = xception(input) - elif 'mobilenet' in cfg.MODEL.BACKBONE: - data, decode_shortcut = mobilenetv2(input) - elif 'resnet' in cfg.MODEL.BACKBONE: - res5, feat_dict = resnet(input) - res4 = feat_dict[91] - elif 'hrnet' in cfg.MODEL.BACKBONE: - res5 = hrnet(input) - else: - raise Exception("deeplabv3 only support xception, mobilenet, resnet, and hrnet backbone") - - logit = ASPPHead(res5, mid_channel= 256, num_classes= num_classes, output_shape= input.shape[2:]) - if cfg.MODEL.DEEPLABv3.AuxHead: - aux_logit = FCNHead(res4, 256, num_classes, input.shape[2:]) - return logit, aux_logit - return logit - diff --git a/PaddleCV/Research/SemSegPaddle/src/models/modeling/glore.py b/PaddleCV/Research/SemSegPaddle/src/models/modeling/glore.py deleted file mode 100644 index 9b909a1c1865f507f0f533f7c85391840e8eac9b..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/models/modeling/glore.py +++ /dev/null @@ -1,126 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import sys -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from src.models.libs.model_libs import scope, name_scope -from src.models.libs.model_libs import avg_pool, conv, bn, bn_zero, conv1d, FCNHead -from src.models.backbone.resnet import ResNet as resnet_backbone -from src.utils.config import cfg - - -def get_logit_interp(input, num_classes, out_shape, name="logit"): - # 1x1_Conv - param_attr = fluid.ParamAttr( - name= name + 'weights', - regularizer= fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0), - initializer= fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) - - with scope(name): - logit = conv(input, num_classes, filter_size=1, param_attr=param_attr, bias_attr=True, name=name+'_conv') - logit_interp = fluid.layers.resize_bilinear( logit, out_shape=out_shape, name=name+'_interp') - return logit_interp - - -def gcn_module(name_scope, x, num_node, num_state): - ''' - input: any tensor of 3D, B,C,N - ''' - print(x.shape) - h = fluid.layers.transpose(x, perm=[0, 2, 1]) #B,C,N-->B,N,C - h = conv1d(h, num_node, name_scope+'_conv1d1', bias_attr=True) - h = fluid.layers.transpose(h, perm=[0, 2, 1]) #B,C,N - h = fluid.layers.elementwise_add(h, x, act='relu') - h = conv1d(h, num_state, name_scope+'_conv1d2', bias_attr= False) - return h - -def gru_module(x, num_state, num_node): - ''' - Global Reasoning Unit: projection --> graph reasoning --> reverse projection - params: - x: B x C x H x W - num_state: the dimension of each vertex feature - num_node: the number of vertet - output: B x C x H x W - feature trans: - B, C, H, W --> B, N, H, W --> B, N, H*W -->B, N, C1 -->B, C1, N-->B, C1, N-->B, C1, H*W-->B, C, H, W - --> B, C1,H, W -->B, C1,H*W -->B, H*W, C1 - ''' - # generate B - num_batch, C, H, W = x.shape - with scope('projection'): - B = conv(x, num_node, - filter_size=1, - bias_attr=True, - name='projection'+'_conv') #num_batch, node, H, W - B = fluid.layers.reshape(B, shape=[num_batch, num_node, H*W]) # Projection Matrix: num_batch, node, L=H*W - # reduce dimension - with scope('reduce_channel'): - x_reduce = conv(x, num_state, - filter_size=1, - bias_attr=True, - name='reduce_channel'+'_conv') #num_batch, num_state, H, W - x_reduce = fluid.layers.reshape(x_reduce, shape=[num_batch, num_state, H*W]) #num_batch, num_state, L - x_reduce = fluid.layers.transpose(x_reduce, perm=[0, 2, 1]) #num_batch, L, num_state - - V = fluid.layers.transpose(fluid.layers.matmul(B, x_reduce), perm=[0,2,1]) #num_batch, num_state, num_node - #L = fluid.layers.fill_constant(shape=[1], value=H*W, dtype='float32') - #V = fluid.layers.elementwise_div(V, L) - new_V = gcn_module('gru'+'_gcn', V, num_node, num_state) - - B = fluid.layers.reshape(B, shape= [num_batch, num_node, H*W]) - D = fluid.layers.transpose(B, perm=[0, 2, 1]) - Y = fluid.layers.matmul(D, fluid.layers.transpose(new_V, perm=[0, 2, 1])) - Y = fluid.layers.transpose(Y, perm=[0, 2, 1]) - Y = fluid.layers.reshape(Y, shape=[num_batch, num_state, H, W]) - with scope('extend_dim'): - Y = conv(Y, C, filter_size=1, bias_attr=False, name='extend_dim'+'_conv') - #Y = bn_zero(Y) - Y = bn(Y) - out = fluid.layers.elementwise_add(Y, x) - return out - -def resnet(input): - # end_points: end_layer of resnet backbone - # dilation_dict: dilation factor for stages_key - scale = cfg.MODEL.GLORE.DEPTH_MULTIPLIER - layers = cfg.MODEL.BACKBONE_LAYERS - end_points = layers - 1 - dilation_dict = {2:2, 3:4} - decode_points= [91, 100] - model = resnet_backbone(layers, scale) - res5, feat_dict = model.net(input, - end_points=end_points, - dilation_dict=dilation_dict, - decode_points= decode_points) - - return res5, feat_dict - -def glore(input, num_classes): - """ - Reference: - Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks", In CVPR 2019 - """ - - # Backbone: ResNet - res5, feat_dict = resnet(input) - res4= feat_dict[91] - # 3x3 Conv. 2048 -> 512 - reduce_kernel=3 - if cfg.DATASET.DATASET_NAME=='cityscapes': - reduce_kernel=1 - with scope('feature'): - feature = conv(res5, 512, filter_size=reduce_kernel, bias_attr=False, name='feature_conv') - feature = bn(feature, act='relu') - # GRU Module - gru_output = gru_module(feature, num_state= 128, num_node = 64) - dropout = fluid.layers.dropout(gru_output, dropout_prob=0.1, name="dropout") - - logit = get_logit_interp(dropout, num_classes, input.shape[2:]) - if cfg.MODEL.GLORE.AuxHead: - aux_logit = FCNHead(res4, 256, num_classes, input.shape[2:]) - return logit, aux_logit - - return logit - diff --git a/PaddleCV/Research/SemSegPaddle/src/models/modeling/pspnet.py b/PaddleCV/Research/SemSegPaddle/src/models/modeling/pspnet.py deleted file mode 100644 index 286c48bd733cf3a8c3aa3a49a0d5e8f0d18a2e1f..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/models/modeling/pspnet.py +++ /dev/null @@ -1,100 +0,0 @@ -from __future__ import division -from __future__ import print_function -import sys -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from src.models.libs.model_libs import scope, name_scope -from src.models.libs.model_libs import avg_pool, conv, bn, FCNHead -from src.models.backbone.resnet import ResNet as resnet_backbone -from src.models.backbone.hrnet import HRNet as hrnet_backbone -from src.utils.config import cfg - - -def PSPHead(input, out_features, num_classes, output_shape): - # Arch of Pyramid Scene Parsing Module: - # - # |----> Pool_1x1 + Conv_1x1 + BN + ReLU + bilinear_interp-------->|————————| - # | | | - # |----> Pool_2x2 + Conv_1x1 + BN + ReLU + bilinear_interp-------->| | - # x ------>| | concat |----> Conv_3x3 + BN + ReLU -->Dropout --> Conv_1x1 - # | |----> Pool_3x3 + Conv_1x1 + BN + ReLU + bilinear_interp-------->| | - # | | | | - # | |----> Pool_6x6 + Conv_1x1 + BN + ReLU + bilinear_interp-------->|________| - # | ^ - # |——————————————————————————————————————————————————————————————————————————————| - # - cat_layers = [] - sizes = (1,2,3,6) - # 4 parallel pooling branches - for size in sizes: - psp_name = "psp" + str(size) - with scope(psp_name): - pool_feat = fluid.layers.adaptive_pool2d(input, pool_size=[size, size], pool_type='avg', - name=psp_name+'_adapool') - conv_feat = conv(pool_feat, out_features, filter_size=1, bias_attr=True, - name= psp_name + '_conv') - bn_feat = bn(conv_feat, act='relu') - interp = fluid.layers.resize_bilinear(bn_feat, out_shape=input.shape[2:], name=psp_name+'_interp') - cat_layers.append(interp) - cat_layers = [input] + cat_layers[::-1] - cat = fluid.layers.concat(cat_layers, axis=1, name='psp_cat') - - # Conv_3x3 + BN + ReLU - psp_end_name = "psp_end" - with scope(psp_end_name): - data = conv(cat, out_features, filter_size=3, padding=1, bias_attr=True, name=psp_end_name) - out = bn(data, act='relu') - # Dropout - dropout_out = fluid.layers.dropout(out, dropout_prob=0.1, name="dropout") - - # Conv_1x1 + bilinear_upsample - seg_name = "logit" - with scope(seg_name): - param_attr = fluid.ParamAttr( name= seg_name+'_weights', - regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) - logit = conv(dropout_out, num_classes, filter_size=1, param_attr=param_attr, bias_attr=True, name=seg_name+'_conv') - logit_interp = fluid.layers.resize_bilinear(logit, out_shape=output_shape, name=seg_name+'_interp') - - return logit_interp - -def resnet(input): - # dilation_dict: - # key: stage num - # value: dilation factor - - scale = cfg.MODEL.PSPNET.DEPTH_MULTIPLIER - layers = cfg.MODEL.BACKBONE_LAYERS - end_points = layers - 1 - decode_points = [91, 100] # [10, 22, 91, 100], for obtaining feature maps of res2,res3, res4, and res5 - dilation_dict = {2:2, 3:4} - model = resnet_backbone(layers, scale) - res5, feat_dict = model.net(input, - end_points=end_points, - dilation_dict=dilation_dict, - decode_points=decode_points) - return res5, feat_dict - -def hrnet(input): - model = hrnet_backbone(stride=4, seg_flag=True) - feats = model.net(input) - return feats - -def pspnet(input, num_classes): - """ - Reference: - Zhao, Hengshuang, et al. "Pyramid scene parsing network.", In CVPR 2017 - """ - if 'resnet' in cfg.MODEL.BACKBONE: - res5, feat_dict = resnet(input) - res4 = feat_dict[91] - elif 'hrnet' in cfg.MODEL.BACKBONE: - res5 = hrnet(input) - else: - raise Exception("pspnet only support resnet and hrnet backbone") - logit = PSPHead(res5, 512, num_classes, input.shape[2:]) - if cfg.MODEL.PSPNET.AuxHead: - aux_logit = FCNHead(res4, 256, num_classes, input.shape[2:]) - return logit, aux_logit - return logit - diff --git a/PaddleCV/Research/SemSegPaddle/src/utils/__init__.py b/PaddleCV/Research/SemSegPaddle/src/utils/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/PaddleCV/Research/SemSegPaddle/src/utils/collect.py b/PaddleCV/Research/SemSegPaddle/src/utils/collect.py deleted file mode 100644 index c434bf47a443e03dbd4ef352cbf7ceacd152cd4a..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/utils/collect.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""A simple attribute dictionary used for representing configuration options.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import copy -import codecs -from ast import literal_eval - -import yaml -import six - - -class SegConfig(dict): - def __init__(self, *args, **kwargs): - super(SegConfig, self).__init__(*args, **kwargs) - self.immutable = False - - def __setattr__(self, key, value, create_if_not_exist=True): - if key in ["immutable"]: - self.__dict__[key] = value - return - - t = self - keylist = key.split(".") - for k in keylist[:-1]: - t = t.__getattr__(k, create_if_not_exist) - - t.__getattr__(keylist[-1], create_if_not_exist) - t[keylist[-1]] = value - - def __getattr__(self, key, create_if_not_exist=True): - if key in ["immutable"]: - return self.__dict__[key] - - if not key in self: - if not create_if_not_exist: - raise KeyError - self[key] = SegConfig() - return self[key] - - def __setitem__(self, key, value): - # - if self.immutable: - raise AttributeError( - 'Attempted to set "{}" to "{}", but SegConfig is immutable'. - format(key, value)) - # - if isinstance(value, six.string_types): - try: - value = literal_eval(value) - except ValueError: - pass - except SyntaxError: - pass - super(SegConfig, self).__setitem__(key, value) - - def update_from_segconfig(self, other): - if isinstance(other, dict): - other = SegConfig(other) - assert isinstance(other, SegConfig) - diclist = [("", other)] - while len(diclist): - prefix, tdic = diclist[0] - diclist = diclist[1:] - for key, value in tdic.items(): - key = "{}.{}".format(prefix, key) if prefix else key - if isinstance(value, dict): - diclist.append((key, value)) - continue - try: - self.__setattr__(key, value, create_if_not_exist=False) - except KeyError: - raise KeyError('Non-existent config key: {}'.format(key)) - - def check_and_infer(self): - if self.DATASET.IMAGE_TYPE in ['rgb', 'gray']: - self.DATASET.DATA_DIM = 3 - elif self.DATASET.IMAGE_TYPE in ['rgba']: - self.DATASET.DATA_DIM = 4 - else: - raise KeyError( - 'DATASET.IMAGE_TYPE config error, only support `rgb`, `gray` and `rgba`' - ) - if self.MEAN is not None: - self.DATASET.PADDING_VALUE = [x*255.0 for x in self.MEAN] - """ - if not self.TRAIN_CROP_SIZE: - raise ValueError( - 'TRAIN_CROP_SIZE is empty! Please set a pair of values in format (width, height)' - ) - - if not self.EVAL_CROP_SIZE: - raise ValueError( - 'EVAL_CROP_SIZE is empty! Please set a pair of values in format (width, height)' - ) - """ - - # Ensure file list is use UTF-8 encoding - train_sets = codecs.open(self.DATASET.TRAIN_FILE_LIST, 'r', 'utf-8').readlines() - val_sets = codecs.open(self.DATASET.VAL_FILE_LIST, 'r', 'utf-8').readlines() - test_sets = codecs.open(self.DATASET.TEST_FILE_LIST, 'r', 'utf-8').readlines() - self.DATASET.TRAIN_TOTAL_IMAGES = len(train_sets) - self.DATASET.VAL_TOTAL_IMAGES = len(val_sets) - self.DATASET.TEST_TOTAL_IMAGES = len(test_sets) - - if self.MODEL.MODEL_NAME == 'icnet' and \ - len(self.MODEL.MULTI_LOSS_WEIGHT) != 3: - self.MODEL.MULTI_LOSS_WEIGHT = [1.0, 0.4, 0.16] - - def update_from_list(self, config_list): - if len(config_list) % 2 != 0: - raise ValueError( - "Command line options config format error! Please check it: {}". - format(config_list)) - for key, value in zip(config_list[0::2], config_list[1::2]): - try: - self.__setattr__(key, value, create_if_not_exist=False) - except KeyError: - raise KeyError('Non-existent config key: {}'.format(key)) - - def update_from_file(self, config_file): - with codecs.open(config_file, 'r', 'utf-8') as file: - dic = yaml.load(file, Loader=yaml.FullLoader) - self.update_from_segconfig(dic) - - def set_immutable(self, immutable): - self.immutable = immutable - for value in self.values(): - if isinstance(value, SegConfig): - value.set_immutable(immutable) - - def is_immutable(self): - return self.immutable diff --git a/PaddleCV/Research/SemSegPaddle/src/utils/config.py b/PaddleCV/Research/SemSegPaddle/src/utils/config.py deleted file mode 100644 index 9bec393bc63f9e69e4c4546233075915077835e4..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/utils/config.py +++ /dev/null @@ -1,192 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -from __future__ import unicode_literals -from .collect import SegConfig -import numpy as np - -cfg = SegConfig() - -########################## 基本配置 ########################################### -# 均值,图像预处理减去的均值 -#cfg.MEAN = [0.5, 0.5, 0.5] -cfg.MEAN = [0.485, 0.456, 0.406] -# 标准差,图像预处理除以标准差· -cfg.STD = [0.229, 0.224, 0.225] -# 批处理大小 -cfg.TRAIN_BATCH_SIZE_PER_GPU = 2 -cfg.TRAIN_BATCH_SIZE= 8 -cfg.EVAL_BATCH_SIZE= 8 -# 多进程训练总进程数 -cfg.NUM_TRAINERS = 1 -# 多进程训练进程ID -cfg.TRAINER_ID = 0 -########################## 数据载入配置 ####################################### -# 数据载入时的并发数, 建议值8 -cfg.DATALOADER.NUM_WORKERS = 8 -# 数据载入时缓存队列大小, 建议值256 -cfg.DATALOADER.BUF_SIZE = 256 - -########################## 数据集配置 ######################################### -cfg.DATASET.DATASET_NAME = 'cityscapes' -# 数据主目录目录 -cfg.DATASET.DATA_DIR = './data_local/cityscapes/' -# 训练集列表 -cfg.DATASET.TRAIN_FILE_LIST = './data_local/cityscapes/train.list' -# 训练集数量 -cfg.DATASET.TRAIN_TOTAL_IMAGES = 5 -# 验证集列表 -cfg.DATASET.VAL_FILE_LIST = './data_local/cityscapes/val.list' -# 验证数据数量 -cfg.DATASET.VAL_TOTAL_IMAGES = 50 -# 测试数据列表 -cfg.DATASET.TEST_FILE_LIST = './data_local/cityscapes/test.list' -# 测试数据数量 -cfg.DATASET.TEST_TOTAL_IMAGES = 1525 -# Tensorboard 可视化的数据集 -cfg.DATASET.VIS_FILE_LIST = None -# 类别数(需包括背景类) -cfg.DATASET.NUM_CLASSES = 19 -# 输入图像类型, 支持三通道'rgb',四通道'rgba',单通道灰度图'gray' -cfg.DATASET.IMAGE_TYPE = 'rgb' -# 输入图片的通道数 -cfg.DATASET.DATA_DIM = 3 -# 数据列表分割符, 默认为空格 -cfg.DATASET.SEPARATOR = '\t' -# 忽略的像素标签值, 默认为255,一般无需改动 -cfg.DATASET.IGNORE_INDEX = 255 -# 数据增强是图像的padding值 -cfg.DATASET.PADDING_VALUE = [127.5, 127.5, 127.5] - -########################### 数据增强配置 ###################################### -cfg.DATAAUG.EXTRA = True -cfg.DATAAUG.BASE_SIZE = 1024 -cfg.DATAAUG.CROP_SIZE = 769 -cfg.DATAAUG.RAND_SCALE_MIN = 0.75 -cfg.DATAAUG.RAND_SCALE_MAX = 2.0 - - -########################### 训练配置 ########################################## -# 模型保存路径 -cfg.TRAIN.MODEL_SAVE_DIR = '' -# 预训练模型路径 -cfg.TRAIN.PRETRAINED_MODEL_DIR = '' -# 是否resume,继续训练 -cfg.TRAIN.RESUME_MODEL_DIR = '' -# 是否使用多卡间同步BatchNorm均值和方差 -cfg.TRAIN.SYNC_BATCH_NORM = True -# 模型参数保存的epoch间隔数,可用来继续训练中断的模型 -cfg.TRAIN.SNAPSHOT_EPOCH = 10 - -########################### 模型优化相关配置 ################################## -# 初始学习率 -cfg.SOLVER.LR = 0.001 -# 学习率下降方法, 支持poly piecewise cosine 三种 -cfg.SOLVER.LR_POLICY = "poly" -# 优化算法, 支持SGD和Adam两种算法 -cfg.SOLVER.OPTIMIZER = "sgd" -# 动量参数 -cfg.SOLVER.MOMENTUM = 0.9 -# 二阶矩估计的指数衰减率 -cfg.SOLVER.MOMENTUM2 = 0.999 -# 学习率Poly下降指数 -cfg.SOLVER.POWER = 0.9 -# step下降指数 -cfg.SOLVER.GAMMA = 0.1 -# step下降间隔 -cfg.SOLVER.DECAY_EPOCH = [10, 20] -# 学习率权重衰减,0-1 -#cfg.SOLVER.WEIGHT_DECAY = 0.0001 -cfg.SOLVER.WEIGHT_DECAY = 0.00004 -# 训练开始epoch数,默认为1 -cfg.SOLVER.BEGIN_EPOCH = 1 -# 训练epoch数,正整数 -cfg.SOLVER.NUM_EPOCHS = 30 -# loss的选择,支持softmax_loss, bce_loss, dice_loss -cfg.SOLVER.LOSS = ["softmax_loss"] -# 是否开启warmup学习策略 -cfg.SOLVER.LR_WARMUP = False -# warmup的迭代次数 -cfg.SOLVER.LR_WARMUP_STEPS = 2000 - -########################## 测试配置 ########################################### -# 测试模型路径 -cfg.TEST.TEST_MODEL = '' -cfg.TEST.BASE_SIZE = 2048 -cfg.TEST.CROP_SIZE = 769 -cfg.TEST.SLIDE_WINDOW = True - -########################## 模型通用配置 ####################################### -# 模型名称, 支持pspnet, deeplabv3, glore, ginet -cfg.MODEL.MODEL_NAME = '' -# BatchNorm类型: bn、gn(group_norm) -cfg.MODEL.DEFAULT_NORM_TYPE = 'bn' -# 多路损失加权值 -cfg.MODEL.MULTI_LOSS_WEIGHT = [1.0, 0.4] -# DEFAULT_NORM_TYPE为gn时group数 -cfg.MODEL.DEFAULT_GROUP_NUMBER = 32 -# 极小值, 防止分母除0溢出,一般无需改动 -cfg.MODEL.DEFAULT_EPSILON = 1e-5 -# BatchNorm动量, 一般无需改动 -cfg.MODEL.BN_MOMENTUM = 0.99 -# 是否使用FP16训练 -cfg.MODEL.FP16 = False -# 混合精度训练需对LOSS进行scale, 默认为动态scale,静态scale可以设置为512.0 -cfg.MODEL.SCALE_LOSS = "DYNAMIC" -# backbone network, (resnet, hrnet, xception_65, mobilenetv2) -cfg.MODEL.BACKBONE= "resnet" -# backbone_layer: 101 and 50 for resnet -cfg.MODEL.BACKBONE_LAYERS=101 -# strides= input.size / feature_maps.size -cfg.MODEL.BACKBONE_OUTPUT_STRIDE=8 -cfg.MODEL.BACKBONE_MULTI_GRID = False - - - -########################## PSPNET模型配置 ###################################### -# RESNET backbone scale 设置 -cfg.MODEL.PSPNET.DEPTH_MULTIPLIER = 1 -# Aux loss -cfg.MODEL.PSPNET.AuxHead= True - - -########################## GloRe模型配置 ###################################### -# RESNET backbone scale 设置 -cfg.MODEL.GLORE.DEPTH_MULTIPLIER = 1 -# Aux loss -cfg.MODEL.GLORE.AuxHead= True - -########################## DeepLabv3模型配置 #################################### -# MobileNet v2 backbone scale 设置 -cfg.MODEL.DEEPLABv3.DEPTH_MULTIPLIER = 1.0 -# ASPP是否使用可分离卷积 -cfg.MODEL.DEEPLABv3.ASPP_WITH_SEP_CONV = True -cfg.MODEL.DEEPLABv3.AuxHead= True - - - -########################## HRNET模型配置 ###################################### -# HRNET STAGE2 设置 -cfg.MODEL.HRNET.STAGE2.NUM_MODULES = 1 -cfg.MODEL.HRNET.STAGE2.NUM_CHANNELS = [40, 80] -# HRNET STAGE3 设置 -cfg.MODEL.HRNET.STAGE3.NUM_MODULES = 4 -cfg.MODEL.HRNET.STAGE3.NUM_CHANNELS = [40, 80, 160] -# HRNET STAGE4 设置 -cfg.MODEL.HRNET.STAGE4.NUM_MODULES = 3 -cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS = [40, 80, 160, 320] - - diff --git a/PaddleCV/Research/SemSegPaddle/src/utils/dist_utils.py b/PaddleCV/Research/SemSegPaddle/src/utils/dist_utils.py deleted file mode 100755 index 64c8800fd2010d4e1e5def6cc4ea2e1ad673b4a3..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/utils/dist_utils.py +++ /dev/null @@ -1,92 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import paddle.fluid as fluid - - -def nccl2_prepare(args, startup_prog, main_prog): - config = fluid.DistributeTranspilerConfig() - config.mode = "nccl2" - t = fluid.DistributeTranspiler(config=config) - - envs = args.dist_env - - t.transpile( - envs["trainer_id"], - trainers=','.join(envs["trainer_endpoints"]), - current_endpoint=envs["current_endpoint"], - startup_program=startup_prog, - program=main_prog) - - -def pserver_prepare(args, train_prog, startup_prog): - config = fluid.DistributeTranspilerConfig() - config.slice_var_up = args.split_var - t = fluid.DistributeTranspiler(config=config) - envs = args.dist_env - training_role = envs["training_role"] - - t.transpile( - envs["trainer_id"], - program=train_prog, - pservers=envs["pserver_endpoints"], - trainers=envs["num_trainers"], - sync_mode=not args.async_mode, - startup_program=startup_prog) - if training_role == "PSERVER": - pserver_program = t.get_pserver_program(envs["current_endpoint"]) - pserver_startup_program = t.get_startup_program( - envs["current_endpoint"], - pserver_program, - startup_program=startup_prog) - return pserver_program, pserver_startup_program - elif training_role == "TRAINER": - train_program = t.get_trainer_program() - return train_program, startup_prog - else: - raise ValueError( - 'PADDLE_TRAINING_ROLE environment variable must be either TRAINER or PSERVER' - ) - - -def nccl2_prepare_paddle(trainer_id, startup_prog, main_prog): - config = fluid.DistributeTranspilerConfig() - config.mode = "nccl2" - t = fluid.DistributeTranspiler(config=config) - t.transpile( - trainer_id, - trainers=os.environ.get('PADDLE_TRAINER_ENDPOINTS'), - current_endpoint=os.environ.get('PADDLE_CURRENT_ENDPOINT'), - startup_program=startup_prog, - program=main_prog) - - -def prepare_for_multi_process(exe, build_strategy, train_prog): - # prepare for multi-process - trainer_id = int(os.environ.get('PADDLE_TRAINER_ID', 0)) - num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - if num_trainers < 2: return - - build_strategy.num_trainers = num_trainers - build_strategy.trainer_id = trainer_id - # NOTE(zcd): use multi processes to train the model, - # and each process use one GPU card. - startup_prog = fluid.Program() - nccl2_prepare_paddle(trainer_id, startup_prog, train_prog) - # the startup_prog are run two times, but it doesn't matter. - exe.run(startup_prog) diff --git a/PaddleCV/Research/SemSegPaddle/src/utils/fp16_utils.py b/PaddleCV/Research/SemSegPaddle/src/utils/fp16_utils.py deleted file mode 100644 index 38edda500c17aefba4f8c9c59284a40c03c99843..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/utils/fp16_utils.py +++ /dev/null @@ -1,31 +0,0 @@ -import os -from paddle import fluid - -def load_fp16_vars(executor, dirname, program): - load_dirname = os.path.normpath(dirname) - - def _if_exist(var): - name = var.name[:-7] if var.name.endswith('.master') else var.name - b = os.path.exists(os.path.join(load_dirname, name)) - if not b and isinstance(var, fluid.framework.Parameter): - print("===== {} not found ====".format(var.name)) - return b - - load_prog = fluid.Program() - load_block = load_prog.global_block() - vars = list(filter(_if_exist, program.list_vars())) - - for var in vars: - new_var = fluid.io._clone_var_in_block_(load_block, var) - name = var.name[:-7] if var.name.endswith('.master') else var.name - file_path = os.path.join(load_dirname, name) - load_block.append_op( - type='load', - inputs={}, - outputs={'Out': [new_var]}, - attrs={ - 'file_path': file_path, - 'load_as_fp16': var.dtype == fluid.core.VarDesc.VarType.FP16 - }) - - executor.run(load_prog) \ No newline at end of file diff --git a/PaddleCV/Research/SemSegPaddle/src/utils/loss.py b/PaddleCV/Research/SemSegPaddle/src/utils/loss.py deleted file mode 100644 index 6bb6e98332912770b794ee6a84d849fef81773d6..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/utils/loss.py +++ /dev/null @@ -1,121 +0,0 @@ -# coding: utf8 -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import paddle.fluid as fluid -import numpy as np -import importlib -from src.utils.config import cfg - - -def softmax_with_loss(logit, label, ignore_mask=None, num_classes=2): - ignore_mask = fluid.layers.cast(ignore_mask, 'float32') - label = fluid.layers.elementwise_min( label, fluid.layers.assign(np.array([num_classes - 1], dtype=np.int32))) - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - logit = fluid.layers.reshape(logit, [-1, num_classes]) - label = fluid.layers.reshape(label, [-1, 1]) - label = fluid.layers.cast(label, 'int64') - ignore_mask = fluid.layers.reshape(ignore_mask, [-1, 1]) - - loss, probs = fluid.layers.softmax_with_cross_entropy( - logit, - label, - ignore_index=cfg.DATASET.IGNORE_INDEX, - return_softmax=True) - - loss = loss * ignore_mask - avg_loss = fluid.layers.mean(loss) / fluid.layers.mean(ignore_mask) - - label.stop_gradient = True - ignore_mask.stop_gradient = True - return avg_loss - -# to change, how to appicate ignore index and ignore mask -def dice_loss(logit, label, ignore_mask=None, epsilon=0.00001): - if logit.shape[1] != 1 or label.shape[1] != 1 or ignore_mask.shape[1] != 1: - raise Exception("dice loss is only applicable to one channel classfication") - ignore_mask = fluid.layers.cast(ignore_mask, 'float32') - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - label = fluid.layers.transpose(label, [0, 2, 3, 1]) - label = fluid.layers.cast(label, 'int64') - ignore_mask = fluid.layers.transpose(ignore_mask, [0, 2, 3, 1]) - logit = fluid.layers.sigmoid(logit) - logit = logit * ignore_mask - label = label * ignore_mask - reduce_dim = list(range(1, len(logit.shape))) - inse = fluid.layers.reduce_sum(logit * label, dim=reduce_dim) - dice_denominator = fluid.layers.reduce_sum( - logit, dim=reduce_dim) + fluid.layers.reduce_sum( - label, dim=reduce_dim) - dice_score = 1 - inse * 2 / (dice_denominator + epsilon) - label.stop_gradient = True - ignore_mask.stop_gradient = True - return fluid.layers.reduce_mean(dice_score) - -def bce_loss(logit, label, ignore_mask=None): - if logit.shape[1] != 1 or label.shape[1] != 1 or ignore_mask.shape[1] != 1: - raise Exception("bce loss is only applicable to binary classfication") - label = fluid.layers.cast(label, 'float32') - loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=logit, - label=label, - ignore_index=cfg.DATASET.IGNORE_INDEX, - normalize=True) # or False - loss = fluid.layers.reduce_sum(loss) - label.stop_gradient = True - ignore_mask.stop_gradient = True - return loss - - -def multi_softmax_with_loss(logits, label, ignore_mask=None, num_classes=2): - if isinstance(logits, tuple): - print("logits.type: ",type(logits)) - avg_loss = 0 - for i, logit in enumerate(logits): - logit_label = fluid.layers.resize_nearest(label, logit.shape[2:]) - logit_mask = (logit_label.astype('int32') != - cfg.DATASET.IGNORE_INDEX).astype('int32') - loss = softmax_with_loss(logit, logit_label, logit_mask, - num_classes) - avg_loss += cfg.MODEL.MULTI_LOSS_WEIGHT[i] * loss - else: - avg_loss = softmax_with_loss(logits, label, ignore_mask, num_classes) - return avg_loss - -def multi_dice_loss(logits, label, ignore_mask=None): - if isinstance(logits, tuple): - avg_loss = 0 - for i, logit in enumerate(logits): - logit_label = fluid.layers.resize_nearest(label, logit.shape[2:]) - logit_mask = (logit_label.astype('int32') != - cfg.DATASET.IGNORE_INDEX).astype('int32') - loss = dice_loss(logit, logit_label, logit_mask) - avg_loss += cfg.MODEL.MULTI_LOSS_WEIGHT[i] * loss - else: - avg_loss = dice_loss(logits, label, ignore_mask) - return avg_loss - -def multi_bce_loss(logits, label, ignore_mask=None): - if isinstance(logits, tuple): - avg_loss = 0 - for i, logit in enumerate(logits): - logit_label = fluid.layers.resize_nearest(label, logit.shape[2:]) - logit_mask = (logit_label.astype('int32') != - cfg.DATASET.IGNORE_INDEX).astype('int32') - loss = bce_loss(logit, logit_label, logit_mask) - avg_loss += cfg.MODEL.MULTI_LOSS_WEIGHT[i] * loss - else: - avg_loss = bce_loss(logits, label, ignore_mask) - return avg_loss diff --git a/PaddleCV/Research/SemSegPaddle/src/utils/metrics.py b/PaddleCV/Research/SemSegPaddle/src/utils/metrics.py deleted file mode 100644 index 2898be028f3dfa03ad9892310da89f7695829542..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/utils/metrics.py +++ /dev/null @@ -1,145 +0,0 @@ -# coding: utf8 -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import numpy as np -from scipy.sparse import csr_matrix - - -class ConfusionMatrix(object): - """ - Confusion Matrix for segmentation evaluation - """ - - def __init__(self, num_classes=2, streaming=False): - self.confusion_matrix = np.zeros([num_classes, num_classes], - dtype='int64') - self.num_classes = num_classes - self.streaming = streaming - - def calculate(self, pred, label, ignore=None): - # If not in streaming mode, clear matrix everytime when call `calculate` - if not self.streaming: - self.zero_matrix() - - label = np.transpose(label, (0, 2, 3, 1)) - ignore = np.transpose(ignore, (0, 2, 3, 1)) - mask = np.array(ignore) == 1 - - label = np.asarray(label)[mask] - pred = np.asarray(pred)[mask] - one = np.ones_like(pred) - # Accumuate ([row=label, col=pred], 1) into sparse matrix - spm = csr_matrix((one, (label, pred)), - shape=(self.num_classes, self.num_classes)) - spm = spm.todense() - self.confusion_matrix += spm - - def zero_matrix(self): - """ Clear confusion matrix """ - self.confusion_matrix = np.zeros([self.num_classes, self.num_classes], - dtype='int64') - - def mean_iou(self): - iou_list = [] - avg_iou = 0 - # TODO: use numpy sum axis api to simpliy - vji = np.zeros(self.num_classes, dtype=int) - vij = np.zeros(self.num_classes, dtype=int) - for j in range(self.num_classes): - v_j = 0 - for i in range(self.num_classes): - v_j += self.confusion_matrix[j][i] - vji[j] = v_j - - for i in range(self.num_classes): - v_i = 0 - for j in range(self.num_classes): - v_i += self.confusion_matrix[j][i] - vij[i] = v_i - - for c in range(self.num_classes): - total = vji[c] + vij[c] - self.confusion_matrix[c][c] - if total == 0: - iou = 0 - else: - iou = float(self.confusion_matrix[c][c]) / total - avg_iou += iou - iou_list.append(iou) - avg_iou = float(avg_iou) / float(self.num_classes) - return np.array(iou_list), avg_iou - - def accuracy(self): - total = self.confusion_matrix.sum() - total_right = 0 - for c in range(self.num_classes): - total_right += self.confusion_matrix[c][c] - if total == 0: - avg_acc = 0 - else: - avg_acc = float(total_right) / total - - vij = np.zeros(self.num_classes, dtype=int) - for i in range(self.num_classes): - v_i = 0 - for j in range(self.num_classes): - v_i += self.confusion_matrix[j][i] - vij[i] = v_i - - acc_list = [] - for c in range(self.num_classes): - if vij[c] == 0: - acc = 0 - else: - acc = self.confusion_matrix[c][c] / float(vij[c]) - acc_list.append(acc) - return np.array(acc_list), avg_acc - - def kappa(self): - vji = np.zeros(self.num_classes) - vij = np.zeros(self.num_classes) - for j in range(self.num_classes): - v_j = 0 - for i in range(self.num_classes): - v_j += self.confusion_matrix[j][i] - vji[j] = v_j - - for i in range(self.num_classes): - v_i = 0 - for j in range(self.num_classes): - v_i += self.confusion_matrix[j][i] - vij[i] = v_i - - total = self.confusion_matrix.sum() - - # avoid spillovers - # TODO: is it reasonable to hard code 10000.0? - total = float(total) / 10000.0 - vji = vji / 10000.0 - vij = vij / 10000.0 - - tp = 0 - tc = 0 - for c in range(self.num_classes): - tp += vji[c] * vij[c] - tc += self.confusion_matrix[c][c] - - tc = tc / 10000.0 - pe = tp / (total * total) - po = tc / total - - kappa = (po - pe) / (1 - pe) - return kappa diff --git a/PaddleCV/Research/SemSegPaddle/src/utils/palette.py b/PaddleCV/Research/SemSegPaddle/src/utils/palette.py deleted file mode 100644 index 16f59602e1cc0b37d5c770df33c820934553c2ff..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/utils/palette.py +++ /dev/null @@ -1,66 +0,0 @@ -def get_cityscapes_palette(num_cls=19): - """ Returns the color map for visualizing the segmentation mask. - Args: - num_cls: Number of classes - Returns: - The color map - """ - - palette = [0] * (num_cls * 3) - palette[0:3] = (128, 64, 128) # 0: 'road' - palette[3:6] = (244, 35,232) # 1 'sidewalk' - palette[6:9] = (70, 70, 70) # 2''building' - palette[9:12] = (102,102,156) # 3 wall - palette[12:15] = (190,153,153) # 4 fence - palette[15:18] = (153,153,153) # 5 pole - palette[18:21] = (250,170, 30) # 6 'traffic light' - palette[21:24] = (220,220, 0) # 7 'traffic sign' - palette[24:27] = (107,142, 35) # 8 'vegetation' - palette[27:30] = (152,251,152) # 9 'terrain' - palette[30:33] = ( 70,130,180) # 10 sky - palette[33:36] = (220, 20, 60) # 11 person - palette[36:39] = (255, 0, 0) # 12 rider - palette[39:42] = (0, 0, 142) # 13 car - palette[42:45] = (0, 0, 70) # 14 truck - palette[45:48] = (0, 60,100) # 15 bus - palette[48:51] = (0, 80,100) # 16 train - palette[51:54] = (0, 0,230) # 17 'motorcycle' - palette[54:57] = (119, 11, 32) # 18 'bicycle' - palette[57:60] = (105, 105, 105) - - return palette - - -def get_gene_palette(num_cls=182): #Ref: CCNet - """ Returns the color map for visualizing the segmentation mask. - Args: - num_cls: Number of classes - Returns: - The color map - """ - - n = num_cls - palette = [0] * (n * 3) - for j in range(0, n): - lab = j - palette[j * 3 + 0] = 0 - palette[j * 3 + 1] = 0 - palette[j * 3 + 2] = 0 - i = 0 - while lab: - palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) - palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) - palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) - i += 1 - lab >>= 3 - return palette - -def get_palette(dataset): - if dataset == 'cityscapes': - palette = get_cityscapes_palette(19) - elif dataset == 'pascalContext': - palette = get_gene_palette(num_cls=59) - else: - raise RuntimeError("unkonw dataset :{}".format(dataset)) - return palette - diff --git a/PaddleCV/Research/SemSegPaddle/src/utils/solver.py b/PaddleCV/Research/SemSegPaddle/src/utils/solver.py deleted file mode 100644 index 62baf9a610244b3a20bf976cec52727ec684ab8b..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/utils/solver.py +++ /dev/null @@ -1,159 +0,0 @@ -# coding: utf8 -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import paddle.fluid as fluid -import numpy as np -import importlib -from src.utils.config import cfg -from paddle.fluid.contrib.mixed_precision.decorator import OptimizerWithMixedPrecison, decorate, AutoMixedPrecisionLists - - -class Solver(object): - def __init__(self, main_prog, start_prog): - total_images = cfg.DATASET.TRAIN_TOTAL_IMAGES - self.weight_decay = cfg.SOLVER.WEIGHT_DECAY - self.momentum = cfg.SOLVER.MOMENTUM - self.momentum2 = cfg.SOLVER.MOMENTUM2 - self.step_per_epoch = total_images // cfg.TRAIN_BATCH_SIZE - if total_images % cfg.TRAIN_BATCH_SIZE != 0: - self.step_per_epoch += 1 - self.total_step = cfg.SOLVER.NUM_EPOCHS * self.step_per_epoch - self.main_prog = main_prog - self.start_prog = start_prog - self.warmup_step = cfg.SOLVER.LR_WARMUP_STEPS if cfg.SOLVER.LR_WARMUP else -1 - self.decay_step = self.total_step - self.warmup_step - self.decay_epochs = cfg.SOLVER.NUM_EPOCHS - self.warmup_step / self.step_per_epoch - - def lr_warmup(self, learning_rate, start_lr, end_lr): - linear_step = end_lr - start_lr - lr = fluid.layers.tensor.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=True, - name="learning_rate_warmup") - - global_step = fluid.layers.learning_rate_scheduler._decay_step_counter() - warmup_counter = fluid.layers.autoincreased_step_counter( - counter_name='@LR_DECAY_COUNTER_WARMUP_IN_SEG@', begin=1, step=1) - global_counter = fluid.default_main_program().global_block( - ).vars['@LR_DECAY_COUNTER@'] - warmup_counter = fluid.layers.cast(warmup_counter, 'float32') - - with fluid.layers.control_flow.Switch() as switch: - with switch.case(warmup_counter <= self.warmup_step): - decayed_lr = start_lr + linear_step * ( - warmup_counter / self.warmup_step) - fluid.layers.tensor.assign(decayed_lr, lr) - # hold the global_step to 0 during the warm-up phase - fluid.layers.increment(global_counter, value=-1) - with switch.default(): - fluid.layers.tensor.assign(learning_rate, lr) - return lr - - def piecewise_decay(self): - gamma = cfg.SOLVER.GAMMA - bd = [self.step_per_epoch * e for e in cfg.SOLVER.DECAY_EPOCH] - lr = [cfg.SOLVER.LR * (gamma**i) for i in range(len(bd) + 1)] - decayed_lr = fluid.layers.piecewise_decay(boundaries=bd, values=lr) - return decayed_lr - - def poly_decay(self): - power = cfg.SOLVER.POWER - decayed_lr = fluid.layers.polynomial_decay( - cfg.SOLVER.LR, self.decay_step, end_learning_rate=0, power=power) - return decayed_lr - - def cosine_decay(self): - decayed_lr = fluid.layers.cosine_decay( - cfg.SOLVER.LR, self.step_per_epoch, self.decay_epochs) - return decayed_lr - - def get_lr(self, lr_policy): - if lr_policy.lower() == 'poly': - decayed_lr = self.poly_decay() - elif lr_policy.lower() == 'piecewise': - decayed_lr = self.piecewise_decay() - elif lr_policy.lower() == 'cosine': - decayed_lr = self.cosine_decay() - else: - raise Exception( - "unsupport learning decay policy! only support poly,piecewise,cosine" - ) - - decayed_lr = self.lr_warmup(decayed_lr, 0, cfg.SOLVER.LR) - return decayed_lr - - def sgd_optimizer(self, lr_policy, loss): - decayed_lr = self.get_lr(lr_policy) - optimizer = fluid.optimizer.Momentum( - learning_rate=decayed_lr, - momentum=self.momentum, - regularization=fluid.regularizer.L2Decay( - regularization_coeff=self.weight_decay), - ) - if cfg.MODEL.FP16: - if cfg.MODEL.MODEL_NAME in ["pspnet"]: - custom_black_list = {"pool2d"} - else: - custom_black_list = {} - amp_lists = AutoMixedPrecisionLists( - custom_black_list=custom_black_list) - assert isinstance(cfg.MODEL.SCALE_LOSS, float) or isinstance(cfg.MODEL.SCALE_LOSS, str), \ - "data type of MODEL.SCALE_LOSS must be float or str" - if isinstance(cfg.MODEL.SCALE_LOSS, float): - optimizer = decorate( - optimizer, - amp_lists=amp_lists, - init_loss_scaling=cfg.MODEL.SCALE_LOSS, - use_dynamic_loss_scaling=False) - else: - assert cfg.MODEL.SCALE_LOSS.lower() in [ - 'dynamic' - ], "if MODEL.SCALE_LOSS is a string,\ - must be set as 'DYNAMIC'!" - - optimizer = decorate( - optimizer, - amp_lists=amp_lists, - use_dynamic_loss_scaling=True) - - optimizer.minimize(loss) - return decayed_lr - - def adam_optimizer(self, lr_policy, loss): - decayed_lr = self.get_lr(lr_policy) - optimizer = fluid.optimizer.Adam( - learning_rate=decayed_lr, - beta1=self.momentum, - beta2=self.momentum2, - regularization=fluid.regularizer.L2Decay( - regularization_coeff=self.weight_decay), - ) - optimizer.minimize(loss) - return decayed_lr - - def optimise(self, loss): - lr_policy = cfg.SOLVER.LR_POLICY - opt = cfg.SOLVER.OPTIMIZER - - if opt.lower() == 'adam': - return self.adam_optimizer(lr_policy, loss) - elif opt.lower() == 'sgd': - return self.sgd_optimizer(lr_policy, loss) - else: - raise Exception( - "unsupport optimizer solver, only support adam and sgd") diff --git a/PaddleCV/Research/SemSegPaddle/src/utils/timer.py b/PaddleCV/Research/SemSegPaddle/src/utils/timer.py deleted file mode 100644 index 8e32c343def6e7cab81c6447a090b796d3ce00eb..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/src/utils/timer.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time - - -def calculate_eta(remaining_step, speed): - if remaining_step < 0: - remaining_step = 0 - remaining_time = int(remaining_step / speed) - result = "{:0>2}:{:0>2}:{:0>2}" - arr = [] - for i in range(2, -1, -1): - arr.append(int(remaining_time / 60**i)) - remaining_time %= 60**i - return result.format(*arr) - - -class Timer(object): - """ Simple timer class for measuring time consuming """ - - def __init__(self): - self._start_time = 0.0 - self._end_time = 0.0 - self._elapsed_time = 0.0 - self._is_running = False - - def start(self): - self._is_running = True - self._start_time = time.time() - - def restart(self): - self.start() - - def stop(self): - self._is_running = False - self._end_time = time.time() - - def elapsed_time(self): - self._end_time = time.time() - self._elapsed_time = self._end_time - self._start_time - if not self.is_running: - return 0.0 - - return self._elapsed_time - - @property - def is_running(self): - return self._is_running diff --git a/PaddleCV/Research/SemSegPaddle/train.py b/PaddleCV/Research/SemSegPaddle/train.py deleted file mode 100644 index e91113e5de996a6f19988fa44fa0c8d32d37620d..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/train.py +++ /dev/null @@ -1,429 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -# GPU memory garbage collection optimization flags -os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" - -import sys -import timeit -import argparse -import pprint -import shutil -import functools -import paddle -import numpy as np -import paddle.fluid as fluid - -from src.utils.metrics import ConfusionMatrix -from src.utils.config import cfg -from src.utils.timer import Timer, calculate_eta -from src.utils import dist_utils -from src.datasets import build_dataset -from src.models.model_builder import build_model -from src.models.model_builder import ModelPhase -from src.models.model_builder import parse_shape_from_file -from eval import evaluate -from vis import visualize - - -def parse_args(): - parser = argparse.ArgumentParser(description='semseg-paddle') - parser.add_argument( - '--cfg', - dest='cfg_file', - help='Config file for training (and optionally testing)', - default=None, - type=str) - parser.add_argument( - '--use_gpu', - dest='use_gpu', - help='Use gpu or cpu', - action='store_true', - default=False) - parser.add_argument( - '--use_mpio', - dest='use_mpio', - help='Use multiprocess I/O or not', - action='store_true', - default=False) - parser.add_argument( - '--log_steps', - dest='log_steps', - help='Display logging information at every log_steps', - default=10, - type=int) - parser.add_argument( - '--debug', - dest='debug', - help='debug mode, display detail information of training', - action='store_true') - parser.add_argument( - '--use_tb', - dest='use_tb', - help='whether to record the data during training to Tensorboard', - action='store_true') - parser.add_argument( - '--tb_log_dir', - dest='tb_log_dir', - help='Tensorboard logging directory', - default=None, - type=str) - parser.add_argument( - '--do_eval', - dest='do_eval', - help='Evaluation models result on every new checkpoint', - action='store_true') - parser.add_argument( - 'opts', - help='See utils/config.py for all options', - default=None, - nargs=argparse.REMAINDER) - return parser.parse_args() - - - - -def save_checkpoint(exe, program, ckpt_name): - """ - Save checkpoint for evaluation or resume training - """ - filename= '{}_{}_{}_epoch_{}.pdparams'.format(str(cfg.MODEL.MODEL_NAME), - str(cfg.MODEL.BACKBONE), str(cfg.DATASET.DATASET_NAME), ckpt_name) - ckpt_dir = cfg.TRAIN.MODEL_SAVE_DIR - - print("Save model checkpoint to {}".format(ckpt_dir)) - if not os.path.isdir(ckpt_dir): - os.makedirs(ckpt_dir) - - fluid.io.save_params(exe, ckpt_dir, program, filename) - return ckpt_dir - - -def load_checkpoint(exe, program): - """ - Load checkpoiont from pretrained model directory for resume training - """ - - print('Resume model training from:', cfg.TRAIN.RESUME_MODEL_DIR) - if not os.path.exists(cfg.TRAIN.RESUME_MODEL_DIR): - raise ValueError("TRAIN.PRETRAIN_MODEL {} not exist!".format( - cfg.TRAIN.RESUME_MODEL_DIR)) - - fluid.io.load_persistables( - exe, cfg.TRAIN.RESUME_MODEL_DIR, main_program=program) - - model_path = cfg.TRAIN.RESUME_MODEL_DIR - # Check is path ended by path spearator - if model_path[-1] == os.sep: - model_path = model_path[0:-1] - epoch_name = os.path.basename(model_path) - # If resume model is final model - if epoch_name == 'final': - begin_epoch = cfg.SOLVER.NUM_EPOCHS - # If resume model path is end of digit, restore epoch status - elif epoch_name.isdigit(): - epoch = int(epoch_name) - begin_epoch = epoch + 1 - else: - raise ValueError("Resume model path is not valid!") - print("Model checkpoint loaded successfully!") - - return begin_epoch - - -def print_info(*msg): - if cfg.TRAINER_ID == 0: - print(*msg) - - -def train(cfg): - startup_prog = fluid.Program() - train_prog = fluid.Program() - drop_last = True - dataset = build_dataset(cfg.DATASET.DATASET_NAME, - file_list=cfg.DATASET.TRAIN_FILE_LIST, - mode=ModelPhase.TRAIN, - shuffle=True, - data_dir=cfg.DATASET.DATA_DIR, - base_size= cfg.DATAAUG.BASE_SIZE, crop_size= cfg.DATAAUG.CROP_SIZE, rand_scale=True) - - def data_generator(): - if args.use_mpio: - data_gen = dataset.multiprocess_generator( - num_processes=cfg.DATALOADER.NUM_WORKERS, - max_queue_size=cfg.DATALOADER.BUF_SIZE) - else: - data_gen = dataset.generator() - - batch_data = [] - for b in data_gen: - batch_data.append(b) - if len(batch_data) == (cfg.TRAIN_BATCH_SIZE // cfg.NUM_TRAINERS): - for item in batch_data: - yield item[0], item[1], item[2] - batch_data = [] - # If use sync batch norm strategy, drop last batch if number of samples - # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues - if not cfg.TRAIN.SYNC_BATCH_NORM: - for item in batch_data: - yield item[0], item[1], item[2] - - # Get device environment - gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) - place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() - places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() - - # Get number of GPU - dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) - print_info("#device count: {}".format(dev_count)) - cfg.TRAIN_BATCH_SIZE = dev_count * int(cfg.TRAIN_BATCH_SIZE_PER_GPU) - print_info("#train_batch_size: {}".format(cfg.TRAIN_BATCH_SIZE)) - print_info("#batch_size_per_dev: {}".format(cfg.TRAIN_BATCH_SIZE_PER_GPU)) - - py_reader, avg_loss, lr, pred, grts, masks = build_model( - train_prog, startup_prog, phase=ModelPhase.TRAIN) - py_reader.decorate_sample_generator( - data_generator, batch_size=cfg.TRAIN_BATCH_SIZE_PER_GPU, drop_last=drop_last) - - exe = fluid.Executor(place) - exe.run(startup_prog) - - exec_strategy = fluid.ExecutionStrategy() - # Clear temporary variables every 100 iteration - if args.use_gpu: - exec_strategy.num_threads = fluid.core.get_cuda_device_count() - exec_strategy.num_iteration_per_drop_scope = 100 - build_strategy = fluid.BuildStrategy() - - if cfg.NUM_TRAINERS > 1 and args.use_gpu: - dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) - exec_strategy.num_threads = 1 - - if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: - if dev_count > 1: - # Apply sync batch norm strategy - print_info("Sync BatchNorm strategy is effective.") - build_strategy.sync_batch_norm = True - else: - print_info( - "Sync BatchNorm strategy will not be effective if GPU device" - " count <= 1") - compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( - loss_name=avg_loss.name, - exec_strategy=exec_strategy, - build_strategy=build_strategy) - - # Resume training - begin_epoch = cfg.SOLVER.BEGIN_EPOCH - if cfg.TRAIN.RESUME_MODEL_DIR: - begin_epoch = load_checkpoint(exe, train_prog) - # Load pretrained model - elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): - print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR) - load_vars = [] - load_fail_vars = [] - - def var_shape_matched(var, shape): - """ - Check whehter persitable variable shape is match with current network - """ - var_exist = os.path.exists( - os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) - if var_exist: - var_shape = parse_shape_from_file( - os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) - return var_shape == shape - return False - - for x in train_prog.list_vars(): - if isinstance(x, fluid.framework.Parameter): - shape = tuple(fluid.global_scope().find_var( - x.name).get_tensor().shape()) - if var_shape_matched(x, shape): - load_vars.append(x) - else: - load_fail_vars.append(x) - - fluid.io.load_vars( - exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars) - for var in load_vars: - print_info("Parameter[{}] loaded sucessfully!".format(var.name)) - for var in load_fail_vars: - print_info( - "Parameter[{}] don't exist or shape does not match current network, skip" - " to load it.".format(var.name)) - print_info("{}/{} pretrained parameters loaded successfully!".format( - len(load_vars), - len(load_vars) + len(load_fail_vars))) - else: - print_info( - 'Pretrained model dir {} not exists, training from scratch...'. - format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) - - fetch_list = [avg_loss.name, lr.name] - if args.debug: - # Fetch more variable info and use streaming confusion matrix to - # calculate IoU results if in debug mode - np.set_printoptions( - precision=4, suppress=True, linewidth=160, floatmode="fixed") - fetch_list.extend([pred.name, grts.name, masks.name]) - cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) - - if args.use_tb: - if not args.tb_log_dir: - print_info("Please specify the log directory by --tb_log_dir.") - exit(1) - - from tb_paddle import SummaryWriter - log_writer = SummaryWriter(args.tb_log_dir) - - # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) - # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - global_step = 0 - all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.TRAIN_BATCH_SIZE - if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.TRAIN_BATCH_SIZE and drop_last != True: - all_step += 1 - all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) - - avg_loss = 0.0 - timer = Timer() - timer.start() - if begin_epoch > cfg.SOLVER.NUM_EPOCHS: - raise ValueError( - ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format( - begin_epoch, cfg.SOLVER.NUM_EPOCHS)) - - if args.use_mpio: - print_info("Use multiprocess reader") - else: - print_info("Use multi-thread reader") - - for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): - py_reader.start() - while True: - try: - if args.debug: - # Print category IoU and accuracy to check whether the - # traning process is corresponed to expectation - loss, lr, pred, grts, masks = exe.run( - program=compiled_train_prog, - fetch_list=fetch_list, - return_numpy=True) - cm.calculate(pred, grts, masks) - avg_loss += np.mean(np.array(loss)) - global_step += 1 - - if global_step % args.log_steps == 0: - speed = args.log_steps / timer.elapsed_time() - avg_loss /= args.log_steps - category_acc, mean_acc = cm.accuracy() - category_iou, mean_iou = cm.mean_iou() - - print_info(( - "epoch={}/{} step={}/{} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}" - ).format(epoch, cfg.SOLVER.NUM_EPOCHS, global_step, all_step, lr[0], avg_loss, mean_acc, - mean_iou, speed, - calculate_eta(all_step - global_step, speed))) - print_info("Category IoU: ", category_iou) - print_info("Category Acc: ", category_acc) - if args.use_tb: - log_writer.add_scalar('Train/mean_iou', mean_iou, - global_step) - log_writer.add_scalar('Train/mean_acc', mean_acc, - global_step) - log_writer.add_scalar('Train/loss', avg_loss, - global_step) - log_writer.add_scalar('Train/lr', lr[0], - global_step) - log_writer.add_scalar('Train/step/sec', speed, - global_step) - sys.stdout.flush() - avg_loss = 0.0 - cm.zero_matrix() - timer.restart() - else: - # If not in debug mode, avoid unnessary log and calculate - loss, lr = exe.run( - program=compiled_train_prog, - fetch_list=fetch_list, - return_numpy=True) - avg_loss += np.mean(np.array(loss)) - global_step += 1 - - if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0: - avg_loss /= args.log_steps - speed = args.log_steps / timer.elapsed_time() - print(( - "epoch={}/{} step={}/{} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" - ).format(epoch, cfg.SOLVER.NUM_EPOCHS, global_step, all_step, lr[0], avg_loss, speed, - calculate_eta(all_step - global_step, speed))) - if args.use_tb: - log_writer.add_scalar('Train/loss', avg_loss, - global_step) - log_writer.add_scalar('Train/lr', lr[0], - global_step) - log_writer.add_scalar('Train/speed', speed, - global_step) - sys.stdout.flush() - avg_loss = 0.0 - timer.restart() - - except fluid.core.EOFException: - py_reader.reset() - break - except Exception as e: - print(e) - - if epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 and cfg.TRAINER_ID == 0: - ckpt_dir = save_checkpoint(exe, train_prog, epoch) - - if args.do_eval: - print("Evaluation start") - _, mean_iou, _, mean_acc = evaluate( - cfg=cfg, - ckpt_dir=ckpt_dir, - use_gpu=args.use_gpu, - use_mpio=args.use_mpio) - if args.use_tb: - log_writer.add_scalar('Evaluate/mean_iou', mean_iou, - global_step) - log_writer.add_scalar('Evaluate/mean_acc', mean_acc, - global_step) - - # Use Tensorboard to visualize results - if args.use_tb and cfg.DATASET.VIS_FILE_LIST is not None: - visualize( - cfg=cfg, - use_gpu=args.use_gpu, - vis_file_list=cfg.DATASET.VIS_FILE_LIST, - vis_dir="visual", - ckpt_dir=ckpt_dir, - log_writer=log_writer) - - # save final model - if cfg.TRAINER_ID == 0: - save_checkpoint(exe, train_prog, 'final') - - -def main(args): - if args.cfg_file is not None: - cfg.update_from_file(args.cfg_file) - if args.opts: - cfg.update_from_list(args.opts) - - cfg.TRAINER_ID = int(os.getenv("PADDLE_TRAINER_ID", 0)) - cfg.NUM_TRAINERS = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - - cfg.check_and_infer() - print_info(pprint.pformat(cfg)) - train(cfg) - - -if __name__ == '__main__': - args = parse_args() - start = timeit.default_timer() - main(args) - end = timeit.default_timer() - print("training time: {} h".format(1.0*(end-start)/3600)) diff --git a/PaddleCV/Research/SemSegPaddle/vis.py b/PaddleCV/Research/SemSegPaddle/vis.py deleted file mode 100644 index b32998b79b544da47a93879fa3a733fa2d5b170b..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/SemSegPaddle/vis.py +++ /dev/null @@ -1,235 +0,0 @@ -# coding: utf8 -# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -# GPU memory garbage collection optimization flags -os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" - -import sys -import argparse -import pprint -import cv2 -import numpy as np -import paddle.fluid as fluid - -from PIL import Image as PILImage -from src.utils.config import cfg -from src.datasets.cityscapes import CityscapesSeg -from src.models.model_builder import build_model -from src.models.model_builder import ModelPhase - - -def parse_args(): - parser = argparse.ArgumentParser(description='PaddeSeg visualization tools') - parser.add_argument( - '--cfg', - dest='cfg_file', - help='Config file for training (and optionally testing)', - default=None, - type=str) - parser.add_argument( - '--use_gpu', dest='use_gpu', help='Use gpu or cpu', action='store_true') - parser.add_argument( - '--vis_dir', - dest='vis_dir', - help='visual save dir', - type=str, - default='visual') - parser.add_argument( - '--local_test', - dest='local_test', - help='if in local test mode, only visualize 5 images for testing', - action='store_true') - parser.add_argument( - 'opts', - help='See config.py for all options', - default=None, - nargs=argparse.REMAINDER) - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - return parser.parse_args() - - -def makedirs(directory): - if not os.path.exists(directory): - os.makedirs(directory) - - -def get_color_map_list(num_classes): - """ Returns the color map for visualizing the segmentation mask, - which can support arbitrary number of classes. - Args: - num_classes: Number of classes - Returns: - The color map - """ - color_map = num_classes * [0, 0, 0] - for i in range(0, num_classes): - j = 0 - lab = i - while lab: - color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) - color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) - color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) - j += 1 - lab >>= 3 - - return color_map - - -def to_png_fn(fn): - """ - Append png as filename postfix - """ - directory, filename = os.path.split(fn) - basename, ext = os.path.splitext(filename) - - return basename + ".png" - - -def visualize(cfg, - vis_file_list=None, - use_gpu=False, - vis_dir="visual_predict", - ckpt_dir=None, - log_writer=None, - local_test=False, - **kwargs): - if vis_file_list is None: - vis_file_list = cfg.DATASET.TEST_FILE_LIST - dataset = SegDataset( - file_list=vis_file_list, - mode=ModelPhase.VISUAL, - data_dir=cfg.DATASET.DATA_DIR) - - startup_prog = fluid.Program() - test_prog = fluid.Program() - pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) - # Clone forward graph - test_prog = test_prog.clone(for_test=True) - - # Generator full colormap for maximum 256 classes - color_map = get_color_map_list(256) - - # Get device environment - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_prog) - - ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir - - fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) - - save_dir = os.path.join('visual', vis_dir) - makedirs(save_dir) - - fetch_list = [pred.name] - test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True) - img_cnt = 0 - for imgs, grts, img_names, valid_shapes, org_shapes in test_reader: - pred_shape = (imgs.shape[2], imgs.shape[3]) - pred, = exe.run( - program=test_prog, - feed={'image': imgs}, - fetch_list=fetch_list, - return_numpy=True) - - num_imgs = pred.shape[0] - # TODO: use multi-thread to write images - for i in range(num_imgs): - # Add more comments - res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8) - img_name = img_names[i] - res_shape = (res_map.shape[0], res_map.shape[1]) - if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[1]: - res_map = cv2.resize( - res_map, pred_shape, interpolation=cv2.INTER_NEAREST) - valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1]) - res_map = res_map[0:valid_shape[0], 0:valid_shape[1]] - org_shape = (org_shapes[i, 0], org_shapes[i, 1]) - res_map = cv2.resize( - res_map, (org_shape[1], org_shape[0]), - interpolation=cv2.INTER_NEAREST) - - png_fn = to_png_fn(img_name) - - # colorful segment result visualization - vis_fn = os.path.join(save_dir, png_fn) - dirname = os.path.dirname(vis_fn) - makedirs(dirname) - - pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='P') - pred_mask.putpalette(color_map) - pred_mask.save(vis_fn) - - img_cnt += 1 - print("#{} visualize image path: {}".format(img_cnt, vis_fn)) - - # Use Tensorboard to visualize image - if log_writer is not None: - # Calulate epoch from ckpt_dir folder name - epoch = int(os.path.split(ckpt_dir)[-1]) - print("Tensorboard visualization epoch", epoch) - - pred_mask_np = np.array(pred_mask.convert("RGB")) - log_writer.add_image( - "Predict/{}".format(img_name), - pred_mask_np, - epoch, - dataformats='HWC') - # Original image - # BGR->RGB - img = cv2.imread( - os.path.join(cfg.DATASET.DATA_DIR, img_name))[..., ::-1] - log_writer.add_image( - "Images/{}".format(img_name), - img, - epoch, - dataformats='HWC') - # add ground truth (label) images - grt = grts[i] - if grt is not None: - grt = grt[0:valid_shape[0], 0:valid_shape[1]] - grt_pil = PILImage.fromarray(grt.astype(np.uint8), mode='P') - grt_pil.putpalette(color_map) - grt_pil = grt_pil.resize((org_shape[1], org_shape[0])) - grt = np.array(grt_pil.convert("RGB")) - log_writer.add_image( - "Label/{}".format(img_name), - grt, - epoch, - dataformats='HWC') - - # If in local_test mode, only visualize 5 images just for testing - # procedure - if local_test and img_cnt >= 5: - break - - -if __name__ == '__main__': - args = parse_args() - if args.cfg_file is not None: - cfg.update_from_file(args.cfg_file) - if args.opts: - cfg.update_from_list(args.opts) - cfg.check_and_infer() - print(pprint.pformat(cfg)) - visualize(cfg, **args.__dict__) diff --git a/PaddleCV/Research/astar2019/README.md b/PaddleCV/Research/astar2019/README.md deleted file mode 100644 index 6854ce9c8ccb3520a4ae6ca11e5b079350867bcc..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/astar2019/README.md +++ /dev/null @@ -1,26 +0,0 @@ -### 百度之星轻量化检测比赛评测工具 - -数据目录结构如下: - -``` -your/path/coco/ -├── annotations -│   ├── instances_train2017.json -│   ├── instances_val2017.json -| ... -├── train2017 -│   ├── 000000000009.jpg -│   ├── 000000580008.jpg -| ... -├── val2017 -│   ├── 000000000139.jpg -│   ├── 000000000285.jpg -| ... - -``` - -命令示例: -```bash -# Evaluate -python score.py --model_dir your/path/saved_model/ --data_dir your/path/coco/ -``` diff --git a/PaddleCV/Research/astar2019/image_util.py b/PaddleCV/Research/astar2019/image_util.py deleted file mode 100644 index f544187359d56afc04ef0be18f4ef9c6ca442b4e..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/astar2019/image_util.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from PIL import Image, ImageEnhance, ImageDraw -from PIL import ImageFile -import numpy as np -import random -import math - -ImageFile.LOAD_TRUNCATED_IMAGES = True #otherwise IOError raised image file is truncated - - -class sampler(): - def __init__(self, max_sample, max_trial, min_scale, max_scale, - min_aspect_ratio, max_aspect_ratio, min_jaccard_overlap, - max_jaccard_overlap): - self.max_sample = max_sample - self.max_trial = max_trial - self.min_scale = min_scale - self.max_scale = max_scale - self.min_aspect_ratio = min_aspect_ratio - self.max_aspect_ratio = max_aspect_ratio - self.min_jaccard_overlap = min_jaccard_overlap - self.max_jaccard_overlap = max_jaccard_overlap - - -class bbox(): - def __init__(self, xmin, ymin, xmax, ymax): - self.xmin = xmin - self.ymin = ymin - self.xmax = xmax - self.ymax = ymax - - -def bbox_area(src_bbox): - width = src_bbox.xmax - src_bbox.xmin - height = src_bbox.ymax - src_bbox.ymin - return width * height - - -def generate_sample(sampler): - scale = np.random.uniform(sampler.min_scale, sampler.max_scale) - aspect_ratio = np.random.uniform(sampler.min_aspect_ratio, - sampler.max_aspect_ratio) - aspect_ratio = max(aspect_ratio, (scale**2.0)) - aspect_ratio = min(aspect_ratio, 1 / (scale**2.0)) - - bbox_width = scale * (aspect_ratio**0.5) - bbox_height = scale / (aspect_ratio**0.5) - xmin_bound = 1 - bbox_width - ymin_bound = 1 - bbox_height - xmin = np.random.uniform(0, xmin_bound) - ymin = np.random.uniform(0, ymin_bound) - xmax = xmin + bbox_width - ymax = ymin + bbox_height - sampled_bbox = bbox(xmin, ymin, xmax, ymax) - return sampled_bbox - - -def jaccard_overlap(sample_bbox, object_bbox): - if sample_bbox.xmin >= object_bbox.xmax or \ - sample_bbox.xmax <= object_bbox.xmin or \ - sample_bbox.ymin >= object_bbox.ymax or \ - sample_bbox.ymax <= object_bbox.ymin: - return 0 - intersect_xmin = max(sample_bbox.xmin, object_bbox.xmin) - intersect_ymin = max(sample_bbox.ymin, object_bbox.ymin) - intersect_xmax = min(sample_bbox.xmax, object_bbox.xmax) - intersect_ymax = min(sample_bbox.ymax, object_bbox.ymax) - intersect_size = (intersect_xmax - intersect_xmin) * ( - intersect_ymax - intersect_ymin) - sample_bbox_size = bbox_area(sample_bbox) - object_bbox_size = bbox_area(object_bbox) - overlap = intersect_size / ( - sample_bbox_size + object_bbox_size - intersect_size) - return overlap - - -def satisfy_sample_constraint(sampler, sample_bbox, bbox_labels): - if sampler.min_jaccard_overlap == 0 and sampler.max_jaccard_overlap == 0: - return True - for i in range(len(bbox_labels)): - object_bbox = bbox(bbox_labels[i][1], bbox_labels[i][2], - bbox_labels[i][3], bbox_labels[i][4]) - overlap = jaccard_overlap(sample_bbox, object_bbox) - if sampler.min_jaccard_overlap != 0 and \ - overlap < sampler.min_jaccard_overlap: - continue - if sampler.max_jaccard_overlap != 0 and \ - overlap > sampler.max_jaccard_overlap: - continue - return True - return False - - -def generate_batch_samples(batch_sampler, bbox_labels): - sampled_bbox = [] - index = [] - c = 0 - for sampler in batch_sampler: - found = 0 - for i in range(sampler.max_trial): - if found >= sampler.max_sample: - break - sample_bbox = generate_sample(sampler) - if satisfy_sample_constraint(sampler, sample_bbox, bbox_labels): - sampled_bbox.append(sample_bbox) - found = found + 1 - index.append(c) - c = c + 1 - return sampled_bbox - - -def clip_bbox(src_bbox): - src_bbox.xmin = max(min(src_bbox.xmin, 1.0), 0.0) - src_bbox.ymin = max(min(src_bbox.ymin, 1.0), 0.0) - src_bbox.xmax = max(min(src_bbox.xmax, 1.0), 0.0) - src_bbox.ymax = max(min(src_bbox.ymax, 1.0), 0.0) - return src_bbox - - -def meet_emit_constraint(src_bbox, sample_bbox): - center_x = (src_bbox.xmax + src_bbox.xmin) / 2 - center_y = (src_bbox.ymax + src_bbox.ymin) / 2 - if center_x >= sample_bbox.xmin and \ - center_x <= sample_bbox.xmax and \ - center_y >= sample_bbox.ymin and \ - center_y <= sample_bbox.ymax: - return True - return False - - -def transform_labels(bbox_labels, sample_bbox): - proj_bbox = bbox(0, 0, 0, 0) - sample_labels = [] - for i in range(len(bbox_labels)): - sample_label = [] - object_bbox = bbox(bbox_labels[i][1], bbox_labels[i][2], - bbox_labels[i][3], bbox_labels[i][4]) - if not meet_emit_constraint(object_bbox, sample_bbox): - continue - sample_width = sample_bbox.xmax - sample_bbox.xmin - sample_height = sample_bbox.ymax - sample_bbox.ymin - proj_bbox.xmin = (object_bbox.xmin - sample_bbox.xmin) / sample_width - proj_bbox.ymin = (object_bbox.ymin - sample_bbox.ymin) / sample_height - proj_bbox.xmax = (object_bbox.xmax - sample_bbox.xmin) / sample_width - proj_bbox.ymax = (object_bbox.ymax - sample_bbox.ymin) / sample_height - proj_bbox = clip_bbox(proj_bbox) - if bbox_area(proj_bbox) > 0: - sample_label.append(bbox_labels[i][0]) - sample_label.append(float(proj_bbox.xmin)) - sample_label.append(float(proj_bbox.ymin)) - sample_label.append(float(proj_bbox.xmax)) - sample_label.append(float(proj_bbox.ymax)) - #sample_label.append(bbox_labels[i][5]) - sample_label = sample_label + bbox_labels[i][5:] - sample_labels.append(sample_label) - return sample_labels - - -def crop_image(img, bbox_labels, sample_bbox, image_width, image_height): - sample_bbox = clip_bbox(sample_bbox) - xmin = int(sample_bbox.xmin * image_width) - xmax = int(sample_bbox.xmax * image_width) - ymin = int(sample_bbox.ymin * image_height) - ymax = int(sample_bbox.ymax * image_height) - sample_img = img[ymin:ymax, xmin:xmax] - sample_labels = transform_labels(bbox_labels, sample_bbox) - return sample_img, sample_labels - - -def random_brightness(img, settings): - prob = np.random.uniform(0, 1) - if prob < settings._brightness_prob: - delta = np.random.uniform(-settings._brightness_delta, - settings._brightness_delta) + 1 - img = ImageEnhance.Brightness(img).enhance(delta) - return img - - -def random_contrast(img, settings): - prob = np.random.uniform(0, 1) - if prob < settings._contrast_prob: - delta = np.random.uniform(-settings._contrast_delta, - settings._contrast_delta) + 1 - img = ImageEnhance.Contrast(img).enhance(delta) - return img - - -def random_saturation(img, settings): - prob = np.random.uniform(0, 1) - if prob < settings._saturation_prob: - delta = np.random.uniform(-settings._saturation_delta, - settings._saturation_delta) + 1 - img = ImageEnhance.Color(img).enhance(delta) - return img - - -def random_hue(img, settings): - prob = np.random.uniform(0, 1) - if prob < settings._hue_prob: - delta = np.random.uniform(-settings._hue_delta, settings._hue_delta) - img_hsv = np.array(img.convert('HSV')) - img_hsv[:, :, 0] = img_hsv[:, :, 0] + delta - img = Image.fromarray(img_hsv, mode='HSV').convert('RGB') - return img - - -def distort_image(img, settings): - prob = np.random.uniform(0, 1) - # Apply different distort order - if prob > 0.5: - img = random_brightness(img, settings) - img = random_contrast(img, settings) - img = random_saturation(img, settings) - img = random_hue(img, settings) - else: - img = random_brightness(img, settings) - img = random_saturation(img, settings) - img = random_hue(img, settings) - img = random_contrast(img, settings) - return img - - -def expand_image(img, bbox_labels, img_width, img_height, settings): - prob = np.random.uniform(0, 1) - if prob < settings._expand_prob: - if settings._expand_max_ratio - 1 >= 0.01: - expand_ratio = np.random.uniform(1, settings._expand_max_ratio) - height = int(img_height * expand_ratio) - width = int(img_width * expand_ratio) - h_off = math.floor(np.random.uniform(0, height - img_height)) - w_off = math.floor(np.random.uniform(0, width - img_width)) - expand_bbox = bbox(-w_off / img_width, -h_off / img_height, - (width - w_off) / img_width, - (height - h_off) / img_height) - expand_img = np.ones((height, width, 3)) - expand_img = np.uint8(expand_img * np.squeeze(settings._img_mean)) - expand_img = Image.fromarray(expand_img) - expand_img.paste(img, (int(w_off), int(h_off))) - bbox_labels = transform_labels(bbox_labels, expand_bbox) - return expand_img, bbox_labels, width, height - return img, bbox_labels, img_width, img_height diff --git a/PaddleCV/Research/astar2019/reader.py b/PaddleCV/Research/astar2019/reader.py deleted file mode 100644 index 39efa44aa0ea53b3f6a111d315add7e23dc21519..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/astar2019/reader.py +++ /dev/null @@ -1,361 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import xml.etree.ElementTree -import os -import time -import copy -import six -import math -import numpy as np -from PIL import Image -from PIL import ImageDraw -import image_util -import paddle - - -class Settings(object): - def __init__(self, - dataset=None, - data_dir=None, - label_file=None, - resize_h=300, - resize_w=300, - mean_value=[127.5, 127.5, 127.5], - apply_distort=True, - apply_expand=True, - ap_version='11point'): - self._dataset = dataset - self._ap_version = ap_version - self._data_dir = data_dir - if 'pascalvoc' in dataset: - self._label_list = [] - label_fpath = os.path.join(data_dir, label_file) - for line in open(label_fpath): - self._label_list.append(line.strip()) - - self._apply_distort = apply_distort - self._apply_expand = apply_expand - self._resize_height = resize_h - self._resize_width = resize_w - self._img_mean = np.array(mean_value)[:, np.newaxis, np.newaxis].astype( - 'float32') - self._expand_prob = 0.5 - self._expand_max_ratio = 4 - self._hue_prob = 0.5 - self._hue_delta = 18 - self._contrast_prob = 0.5 - self._contrast_delta = 0.5 - self._saturation_prob = 0.5 - self._saturation_delta = 0.5 - self._brightness_prob = 0.5 - self._brightness_delta = 0.125 - - @property - def dataset(self): - return self._dataset - - @property - def ap_version(self): - return self._ap_version - - @property - def apply_expand(self): - return self._apply_expand - - @property - def apply_distort(self): - return self._apply_distort - - @property - def data_dir(self): - return self._data_dir - - @data_dir.setter - def data_dir(self, data_dir): - self._data_dir = data_dir - - @property - def label_list(self): - return self._label_list - - @property - def resize_h(self): - return self._resize_height - - @property - def resize_w(self): - return self._resize_width - - @property - def img_mean(self): - return self._img_mean - - -def preprocess(img, bbox_labels, mode, settings): - img_width, img_height = img.size - sampled_labels = bbox_labels - if mode == 'train': - if settings._apply_distort: - img = image_util.distort_image(img, settings) - if settings._apply_expand: - img, bbox_labels, img_width, img_height = image_util.expand_image( - img, bbox_labels, img_width, img_height, settings) - # sampling - batch_sampler = [] - # hard-code here - batch_sampler.append( - image_util.sampler(1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0)) - batch_sampler.append( - image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0)) - batch_sampler.append( - image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0)) - batch_sampler.append( - image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0)) - batch_sampler.append( - image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0)) - batch_sampler.append( - image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0)) - batch_sampler.append( - image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0)) - sampled_bbox = image_util.generate_batch_samples(batch_sampler, - bbox_labels) - - img = np.array(img) - if len(sampled_bbox) > 0: - idx = int(np.random.uniform(0, len(sampled_bbox))) - img, sampled_labels = image_util.crop_image( - img, bbox_labels, sampled_bbox[idx], img_width, img_height) - - img = Image.fromarray(img) - img = img.resize((settings.resize_w, settings.resize_h), Image.ANTIALIAS) - img = np.array(img) - - if mode == 'train': - mirror = int(np.random.uniform(0, 2)) - if mirror == 1: - img = img[:, ::-1, :] - for i in six.moves.xrange(len(sampled_labels)): - tmp = sampled_labels[i][1] - sampled_labels[i][1] = 1 - sampled_labels[i][3] - sampled_labels[i][3] = 1 - tmp - # HWC to CHW - if len(img.shape) == 3: - img = np.swapaxes(img, 1, 2) - img = np.swapaxes(img, 1, 0) - # RBG to BGR - img = img[[2, 1, 0], :, :] - img = img.astype('float32') - img -= settings.img_mean - img = img * 0.007843 - return img, sampled_labels - - -def coco(settings, coco_api, file_list, mode, batch_size, shuffle, data_dir): - from pycocotools.coco import COCO - - def reader(): - if mode == 'train' and shuffle: - np.random.shuffle(file_list) - batch_out = [] - for image in file_list: - image_name = image['file_name'] - image_path = os.path.join(data_dir, image_name) - if not os.path.exists(image_path): - raise ValueError("%s is not exist, you should specify " - "data path correctly." % image_path) - im = Image.open(image_path) - if im.mode == 'L': - im = im.convert('RGB') - im_width, im_height = im.size - im_id = image['id'] - - # layout: category_id | xmin | ymin | xmax | ymax | iscrowd - bbox_labels = [] - annIds = coco_api.getAnnIds(imgIds=image['id']) - anns = coco_api.loadAnns(annIds) - for ann in anns: - bbox_sample = [] - # start from 1, leave 0 to background - bbox_sample.append(float(ann['category_id'])) - bbox = ann['bbox'] - xmin, ymin, w, h = bbox - xmax = xmin + w - ymax = ymin + h - bbox_sample.append(float(xmin) / im_width) - bbox_sample.append(float(ymin) / im_height) - bbox_sample.append(float(xmax) / im_width) - bbox_sample.append(float(ymax) / im_height) - bbox_sample.append(float(ann['iscrowd'])) - bbox_labels.append(bbox_sample) - im, sample_labels = preprocess(im, bbox_labels, mode, settings) - sample_labels = np.array(sample_labels) - if len(sample_labels) == 0: continue - im = im.astype('float32') - boxes = sample_labels[:, 1:5] - lbls = sample_labels[:, 0].astype('int32') - iscrowd = sample_labels[:, -1].astype('int32') - if 'cocoMAP' in settings.ap_version: - batch_out.append((im, boxes, lbls, iscrowd, - [im_id, im_width, im_height])) - else: - batch_out.append((im, boxes, lbls, iscrowd)) - - if len(batch_out) == batch_size: - yield batch_out - batch_out = [] - - if mode == 'test' and len(batch_out) > 1: - yield batch_out - batch_out = [] - - return reader - - -def pascalvoc(settings, file_list, mode, batch_size, shuffle): - def reader(): - if mode == 'train' and shuffle: - np.random.shuffle(file_list) - batch_out = [] - cnt = 0 - for image in file_list: - image_path, label_path = image.split() - image_path = os.path.join(settings.data_dir, image_path) - label_path = os.path.join(settings.data_dir, label_path) - if not os.path.exists(image_path): - raise ValueError("%s is not exist, you should specify " - "data path correctly." % image_path) - im = Image.open(image_path) - if im.mode == 'L': - im = im.convert('RGB') - im_width, im_height = im.size - - # layout: label | xmin | ymin | xmax | ymax | difficult - bbox_labels = [] - root = xml.etree.ElementTree.parse(label_path).getroot() - for object in root.findall('object'): - bbox_sample = [] - # start from 1 - bbox_sample.append( - float(settings.label_list.index(object.find('name').text))) - bbox = object.find('bndbox') - difficult = float(object.find('difficult').text) - bbox_sample.append(float(bbox.find('xmin').text) / im_width) - bbox_sample.append(float(bbox.find('ymin').text) / im_height) - bbox_sample.append(float(bbox.find('xmax').text) / im_width) - bbox_sample.append(float(bbox.find('ymax').text) / im_height) - bbox_sample.append(difficult) - bbox_labels.append(bbox_sample) - im, sample_labels = preprocess(im, bbox_labels, mode, settings) - sample_labels = np.array(sample_labels) - if len(sample_labels) == 0: continue - im = im.astype('float32') - boxes = sample_labels[:, 1:5] - lbls = sample_labels[:, 0].astype('int32') - difficults = sample_labels[:, -1].astype('int32') - - batch_out.append((im, boxes, lbls, difficults)) - if len(batch_out) == batch_size: - yield batch_out - cnt += len(batch_out) - batch_out = [] - - if mode == 'test' and len(batch_out) > 1: - yield batch_out - cnt += len(batch_out) - batch_out = [] - - return reader - - -def train(settings, - file_list, - batch_size, - shuffle=True, - num_workers=8, - enable_ce=False): - file_path = os.path.join(settings.data_dir, file_list) - readers = [] - if 'coco' in settings.dataset: - # cocoapi - from pycocotools.coco import COCO - coco_api = COCO(file_path) - image_ids = coco_api.getImgIds() - images = coco_api.loadImgs(image_ids) - np.random.shuffle(images) - n = int(math.ceil(len(images) // num_workers)) - image_lists = [images[i:i + n] for i in range(0, len(images), n)] - - if '2014' in file_list: - sub_dir = "train2014" - elif '2017' in file_list: - sub_dir = "train2017" - data_dir = os.path.join(settings.data_dir, sub_dir) - for l in image_lists: - readers.append( - coco(settings, coco_api, l, 'train', batch_size, shuffle, - data_dir)) - else: - images = [line.strip() for line in open(file_path)] - np.random.shuffle(images) - n = int(math.ceil(len(images) // num_workers)) - image_lists = [images[i:i + n] for i in range(0, len(images), n)] - for l in image_lists: - readers.append(pascalvoc(settings, l, 'train', batch_size, shuffle)) - return paddle.reader.multiprocess_reader(readers, False) - - -def test(settings, file_list, batch_size): - file_list = os.path.join(settings.data_dir, file_list) - if 'coco' in settings.dataset: - from pycocotools.coco import COCO - coco_api = COCO(file_list) - image_ids = coco_api.getImgIds() - images = coco_api.loadImgs(image_ids) - if '2014' in file_list: - sub_dir = "val2014" - elif '2017' in file_list: - sub_dir = "val2017" - data_dir = os.path.join(settings.data_dir, sub_dir) - return coco(settings, coco_api, images, 'test', batch_size, False, - data_dir) - else: - image_list = [line.strip() for line in open(file_list)] - return pascalvoc(settings, image_list, 'test', batch_size, False) - - -def infer(settings, image_path): - def reader(): - if not os.path.exists(image_path): - raise ValueError("%s is not exist, you should specify " - "data path correctly." % image_path) - img = Image.open(image_path) - if img.mode == 'L': - img = img.convert('RGB') - im_width, im_height = img.size - img = img.resize((settings.resize_w, settings.resize_h), - Image.ANTIALIAS) - img = np.array(img) - # HWC to CHW - if len(img.shape) == 3: - img = np.swapaxes(img, 1, 2) - img = np.swapaxes(img, 1, 0) - # RBG to BGR - img = img[[2, 1, 0], :, :] - img = img.astype('float32') - img -= settings.img_mean - img = img * 0.007843 - return img - - return reader diff --git a/PaddleCV/Research/astar2019/score.py b/PaddleCV/Research/astar2019/score.py deleted file mode 100644 index 808f173859824e2cbcc1d40600e04148ff4d09a5..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/astar2019/score.py +++ /dev/null @@ -1,178 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -# os.environ["CUDA_VISIBLE_DEVICES"] = "0" -# os.environ["FLAGS_fraction_of_gpu_memory_to_use"] = "0.3" -import sys -sys.path.insert(0, ".") -import argparse -import functools - -import paddle.fluid as fluid -import reader -from utils import * -import json -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval -import tempfile - -parser = argparse.ArgumentParser(description=__doc__) -add_arg = functools.partial(add_arguments, argparser=parser) -# yapf: disable -add_arg('batch_size', int, 32, "Minibatch size.") -add_arg('data_dir', str, '', "The data root path.") -add_arg('test_list', str, '', "The testing data lists.") -add_arg('model_dir', str, '', "The model path.") -add_arg('nms_threshold', float, 0.45, "NMS threshold.") -add_arg('ap_version', str, 'cocoMAP', "cocoMAP.") -add_arg('mean_value_B', float, 127.5, "Mean value for B channel which will be subtracted.") #123.68 -add_arg('mean_value_G', float, 127.5, "Mean value for G channel which will be subtracted.") #116.78 -add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will be subtracted.") #103.94 - -def use_coco_api_compute_mAP(data_args, test_list, num_classes, test_reader, exe, infer_program, - feeded_var_names, feeder, target_var, batch_size): - cocoGt = COCO(os.path.join(data_args.data_dir, test_list)) - json_category_id_to_contiguous_id = { - v: i + 1 - for i, v in enumerate(cocoGt.getCatIds()) - } - contiguous_category_id_to_json_id = { - v: k - for k, v in json_category_id_to_contiguous_id.items() - } - - dts_res = [] - - executor = fluid.Executor(fluid.CUDAPlace(0)) - test_program = fluid.Program() - with fluid.program_guard(test_program): - boxes = fluid.layers.data( - name='boxes', shape=[-1, -1, 4], dtype='float32') - scores = fluid.layers.data( - name='scores', shape=[-1, num_classes, -1], dtype='float32') - pred_result = fluid.layers.multiclass_nms( - bboxes=boxes, - scores=scores, - score_threshold=0.01, - nms_top_k=-1, - nms_threshold=0.45, - keep_top_k=-1, - normalized=False) - - executor.run(fluid.default_startup_program()) - - for batch_id, data in enumerate(test_reader()): - boxes_np, scores_np = exe.run(program=infer_program, - feed={feeded_var_names[0]: feeder.feed(data)['image']}, - fetch_list=target_var) - - nms_out = executor.run( - program=test_program, - feed={ - 'boxes': boxes_np, - 'scores': scores_np - }, - fetch_list=[pred_result], return_numpy=False) - if batch_id % 20 == 0: - print("Batch {0}".format(batch_id)) - dts_res += get_batch_dt_res(nms_out, data, contiguous_category_id_to_json_id, batch_size) - - _, tmp_file = tempfile.mkstemp() - with open(tmp_file, 'w') as outfile: - json.dump(dts_res, outfile) - print("start evaluate using coco api") - cocoDt = cocoGt.loadRes(tmp_file) - cocoEval = COCOeval(cocoGt, cocoDt, "bbox") - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - - mAP = cocoEval.stats[0] - return mAP - -def compute_score(model_dir, data_dir, test_list='annotations/instances_val2017.json', batch_size=32, height=300, width=300, num_classes=81, - mean_value=[127.5, 127.5, 127.5]): - """ - compute score, mAP, flops of a model - - Args: - model_dir (string): directory of model - data_dir (string): directory of coco dataset, like '/your/path/to/coco', '/work/datasets/coco' - - Returns: - tuple: score, mAP, flops. - - """ - - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - [infer_program, feeded_var_names, target_var] = fluid.io.load_inference_model(dirname=model_dir, executor=exe) - - image_shape = [3, height, width] - - data_args = reader.Settings( - dataset='coco2017', - data_dir=data_dir, - resize_h=height, - resize_w=width, - mean_value=mean_value, - apply_distort=False, - apply_expand=False, - ap_version='cocoMAP') - - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - gt_box = fluid.layers.data( - name='gt_box', shape=[4], dtype='float32', lod_level=1) - gt_label = fluid.layers.data( - name='gt_label', shape=[1], dtype='int32', lod_level=1) - gt_iscrowd = fluid.layers.data( - name='gt_iscrowd', shape=[1], dtype='int32', lod_level=1) - gt_image_info = fluid.layers.data( - name='gt_image_id', shape=[3], dtype='int32') - - test_reader = reader.test(data_args, test_list, batch_size) - feeder = fluid.DataFeeder( - place=place, - feed_list=[image, gt_box, gt_label, gt_iscrowd, gt_image_info]) - - mAP = use_coco_api_compute_mAP(data_args, test_list, num_classes, test_reader, exe, infer_program, - feeded_var_names, feeder, target_var, batch_size) - total_flops_params, is_quantize = summary(infer_program) - MAdds = np.sum(total_flops_params['flops']) / 2000000.0 - - if is_quantize: - MAdds /= 2.0 - - print('mAP:', mAP) - print('MAdds:', MAdds) - - if MAdds < 160.0: - MAdds = 160.0 - - if MAdds > 1300.0: - score = 0.0 - else: - score = mAP * 100 - (5.1249 * np.log(MAdds) - 14.499) - - print('score:', score) - - return score, mAP, MAdds - - -if __name__ == '__main__': - args = parser.parse_args() - print_arguments(args) - score, mAP, flops = compute_score(args.model_dir, args.data_dir, batch_size=args.batch_size) diff --git a/PaddleCV/Research/astar2019/utils.py b/PaddleCV/Research/astar2019/utils.py deleted file mode 100644 index 0ed49b81e6482c21fd88fc0988fa5b90d112417d..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/astar2019/utils.py +++ /dev/null @@ -1,268 +0,0 @@ -"""Contains common utility functions.""" -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict -from prettytable import PrettyTable -import distutils.util -import numpy as np -import six - - -def print_arguments(args): - """Print argparse's arguments. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - parser.add_argument("name", default="Jonh", type=str, help="User name.") - args = parser.parse_args() - print_arguments(args) - - :param args: Input argparse.Namespace for printing. - :type args: argparse.Namespace - """ - print("----------- Configuration Arguments -----------") - for arg, value in sorted(six.iteritems(vars(args))): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") - - -def add_arguments(argname, type, default, help, argparser, **kwargs): - """Add argparse's argument. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - add_argument("name", str, "Jonh", "User name.", parser) - args = parser.parse_args() - """ - type = distutils.util.strtobool if type == bool else type - argparser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) - - -def summary(main_prog): - ''' - It can summary model's PARAMS, FLOPs until now. - It support common operator like conv, fc, pool, relu, sigmoid, bn etc. - Args: - main_prog: main program - Returns: - print summary on terminal - ''' - collected_ops_list = [] - is_quantize = False - for one_b in main_prog.blocks: - block_vars = one_b.vars - for one_op in one_b.ops: - if str(one_op.type).find('quantize') > -1: - is_quantize = True - op_info = OrderedDict() - spf_res = _summary_model(block_vars, one_op) - if spf_res is None: - continue - # TODO: get the operator name - op_info['type'] = one_op.type - op_info['input_shape'] = spf_res[0][1:] - op_info['out_shape'] = spf_res[1][1:] - op_info['PARAMs'] = spf_res[2] - op_info['FLOPs'] = spf_res[3] - collected_ops_list.append(op_info) - - - summary_table, total = _format_summary(collected_ops_list) - _print_summary(summary_table, total) - return total, is_quantize - - -def _summary_model(block_vars, one_op): - ''' - Compute operator's params and flops. - Args: - block_vars: all vars of one block - one_op: one operator to count - Returns: - in_data_shape: one operator's input data shape - out_data_shape: one operator's output data shape - params: one operator's PARAMs - flops: : one operator's FLOPs - ''' - if one_op.type in ['conv2d', 'depthwise_conv2d']: - k_arg_shape = block_vars[one_op.input("Filter")[0]].shape - in_data_shape = block_vars[one_op.input("Input")[0]].shape - out_data_shape = block_vars[one_op.output("Output")[0]].shape - c_out, c_in, k_h, k_w = k_arg_shape - _, c_out_, h_out, w_out = out_data_shape - #assert c_out == c_out_, 'shape error!' - k_groups = one_op.attr("groups") - kernel_ops = k_h * k_w * (c_in / k_groups) - bias_ops = 0 if one_op.input("Bias") == [] else 1 - params = c_out * (kernel_ops + bias_ops) - flops = h_out * w_out * c_out * (kernel_ops + bias_ops) - # base nvidia paper, include mul and add - flops = 2 * flops - - # var_name = block_vars[one_op.input("Filter")[0]].name - # if var_name.endswith('.int8'): - # flops /= 2.0 - - elif one_op.type == 'pool2d': - in_data_shape = block_vars[one_op.input("X")[0]].shape - out_data_shape = block_vars[one_op.output("Out")[0]].shape - _, c_out, h_out, w_out = out_data_shape - k_size = one_op.attr("ksize") - params = 0 - flops = h_out * w_out * c_out * (k_size[0] * k_size[1]) - - elif one_op.type == 'mul': - k_arg_shape = block_vars[one_op.input("Y")[0]].shape - in_data_shape = block_vars[one_op.input("X")[0]].shape - out_data_shape = block_vars[one_op.output("Out")[0]].shape - # TODO: fc has mul ops - # add attr to mul op, tell us whether it belongs to 'fc' - # this's not the best way - if 'fc' not in one_op.output("Out")[0]: - return None - k_in, k_out = k_arg_shape - # bias in sum op - params = k_in * k_out + 1 - flops = k_in * k_out - - # var_name = block_vars[one_op.input("Y")[0]].name - # if var_name.endswith('.int8'): - # flops /= 2.0 - - elif one_op.type in ['sigmoid', 'tanh', 'relu', 'leaky_relu', 'prelu']: - in_data_shape = block_vars[one_op.input("X")[0]].shape - out_data_shape = block_vars[one_op.output("Out")[0]].shape - params = 0 - if one_op.type == 'prelu': - params = 1 - flops = 1 - for one_dim in in_data_shape[1:]: - flops *= one_dim - - elif one_op.type == 'batch_norm': - in_data_shape = block_vars[one_op.input("X")[0]].shape - out_data_shape = block_vars[one_op.output("Y")[0]].shape - _, c_in, h_out, w_out = in_data_shape - # gamma, beta - params = c_in * 2 - # compute mean and std - flops = h_out * w_out * c_in * 2 - - else: - return None - - return in_data_shape, out_data_shape, params, flops - - -def _format_summary(collected_ops_list): - ''' - Format summary report. - Args: - collected_ops_list: the collected operator with summary - Returns: - summary_table: summary report format - total: sum param and flops - ''' - summary_table = PrettyTable( - ["No.", "TYPE", "INPUT", "OUTPUT", "PARAMs", "FLOPs"]) - summary_table.align = 'r' - - total = {} - total_params = [] - total_flops = [] - for i, one_op in enumerate(collected_ops_list): - # notice the order - table_row = [ - i, - one_op['type'], - one_op['input_shape'], - one_op['out_shape'], - int(one_op['PARAMs']), - int(one_op['FLOPs']), - ] - summary_table.add_row(table_row) - total_params.append(int(one_op['PARAMs'])) - total_flops.append(int(one_op['FLOPs'])) - - total['params'] = total_params - total['flops'] = total_flops - - return summary_table, total - - -def _print_summary(summary_table, total): - ''' - Print all the summary on terminal. - Args: - summary_table: summary report format - total: sum param and flops - ''' - parmas = total['params'] - flops = total['flops'] - print(summary_table) - print('Total PARAMs: {}({:.4f}M)'.format( - sum(parmas), sum(parmas) / (10 ** 6))) - print('Total FLOPs: {}({:.2f}G)'.format(sum(flops), sum(flops) / 10 ** 9)) - print( - "Notice: \n now supported ops include [Conv, DepthwiseConv, FC(mul), BatchNorm, Pool, Activation(sigmoid, tanh, relu, leaky_relu, prelu)]" - ) - - -def get_batch_dt_res(nmsed_out_v, data, contiguous_category_id_to_json_id, batch_size): - dts_res = [] - lod = nmsed_out_v[0].lod()[0] - nmsed_out_v = np.array(nmsed_out_v[0]) - real_batch_size = min(batch_size, len(data)) - assert (len(lod) == real_batch_size + 1), \ - "Error Lod Tensor offset dimension. Lod({}) vs. batch_size({})".format(len(lod), batch_size) - k = 0 - for i in range(real_batch_size): - dt_num_this_img = lod[i + 1] - lod[i] - image_id = int(data[i][4][0]) - image_width = int(data[i][4][1]) - image_height = int(data[i][4][2]) - for j in range(dt_num_this_img): - dt = nmsed_out_v[k] - k = k + 1 - category_id, score, xmin, ymin, xmax, ymax = dt.tolist() - xmin = max(min(xmin, 1.0), 0.0) * image_width - ymin = max(min(ymin, 1.0), 0.0) * image_height - xmax = max(min(xmax, 1.0), 0.0) * image_width - ymax = max(min(ymax, 1.0), 0.0) * image_height - w = xmax - xmin - h = ymax - ymin - bbox = [xmin, ymin, w, h] - dt_res = { - 'image_id': image_id, - 'category_id': contiguous_category_id_to_json_id[category_id], - 'bbox': bbox, - 'score': score - } - dts_res.append(dt_res) - return dts_res diff --git a/PaddleCV/Research/danet/README.md b/PaddleCV/Research/danet/README.md deleted file mode 100644 index 02348ae2d2c06a548ac6ad9987d398eb83dce49d..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/danet/README.md +++ /dev/null @@ -1,155 +0,0 @@ -# [Dual Attention Network for Scene Segmentation (CVPR2019)](https://arxiv.org/pdf/1809.02983.pdf) - -本项目是[DANet](https://arxiv.org/pdf/1809.02983.pdf)的 PaddlePaddle(>=1.5.2) 实现, 包含模型训练,验证等内容。 - -## 模型简介 -![net](img/Network.png) -骨干网络使用ResNet,为更好地进行语义分割任务,作者对ResNet做出以下改动: - - 1、将最后两个layer的downsampling取消,使得特征图是原图的1/8,保持较高空间分辨率。 - 2、最后两个layer采用空洞卷积扩大感受野。 -然后接上两个并行的注意力模块(位置注意力和通道注意力),最终将两个模块的结果进行elementwise操作,之后再接一层卷积输出分割图。 - -### 位置注意力 - -![position](img/position.png) - -A是骨干网络ResNet输出经过一层卷积生成的特征图,维度为CHW; -A经过3个卷积操作输出维度均为CHW的B、C、D。将B、C、D都reshape到CN(N = H*W); -然后将B reshape后的结果转置与C相乘,得到N * N的矩阵, 对于矩阵的每一个点进行softmax; -然后将D与softmax后的结果相乘并reshape到CHW,再与A进行elementwise。 - -### 通道注意力 -![channel](img/channel.png) - - -A是骨干网络ResNet输出经过一层卷积生成的特征图,维度为CHW; -A经过3个reshape操作输出维度均为CN(N = H*W)的B、C、D; -然后将B转置与C相乘,得到C * C的矩阵,对于矩阵的每一个点进行softmax; -然后将D与softmax后的结果相乘并reshape到CHW,再与A进行elementwise。 - - - -## 数据准备 - -公开数据集:Cityscapes - -训练集2975张,验证集500张,测试集1525张,图片分辨率都是1024*2048。 - -数据集来源:AIstudio数据集页面上[下载](https://aistudio.baidu.com/aistudio/datasetDetail/11503), cityscapes.zip解压至dataset文件夹下,train.zip解压缩到cityscapes/leftImg8bit,其目录结构如下: -```text -dataset - ├── cityscapes # Cityscapes数据集 - ├── gtFine # 精细化标注的label - ├── leftImg8bit # 训练,验证,测试图片 - ├── trainLabels.txt # 训练图片路径 - ├── valLabels.txt # 验证图片路径 - ... ... -``` -## 训练说明 - -#### 数据增强策略 - 1、随机尺度缩放:尺度范围0.75到2.0 - 2、随机左右翻转:发生概率0.5 - 3、同比例缩放:缩放的大小由选项1决定。 - 4、随机裁剪: - 5、高斯模糊:发生概率0.3(可选) - 6、颜色抖动,对比度,锐度,亮度; 发生概率0.3(可选) -###### 默认1、2、3、4、5、6都开启 - -#### 学习率调节策略 - 1、使用热身策略,学习率由0递增到base_lr,热身轮数(epoch)是5 - 2、在热身策略之后使用学习率衰减策略(poly),学习率由base_lr递减到0 - -#### 优化器选择 - Momentum: 动量0.9,正则化系数1e-4 - -#### 加载预训练模型 - 设置 --load_pretrained_model(默认为False) - 预训练文件: - checkpoint/DANet50_pretrained_model_paddle1.6.pdparams - checkpoint/DANet101_pretrained_model_paddle1.6.pdparams - -#### 加载训练好的模型 - 设置 --load_better_model(默认为False) - 训练好的文件: - checkpoint/DANet101_better_model_paddle1.6.pdparams -##### 【注】 - 训练时paddle版本是1.5.2,代码已转为1.6版本(兼容1.6版本),预训练参数、训练好的参数来自1.5.2版本 - -#### 配置模型文件路径 -[预训练参数、最优模型参数下载](https://paddlemodels.bj.bcebos.com/DANet/DANet_models.tar) - -其目录结构如下: -```text -checkpoint - ├── DANet50_pretrained_model_paddle1.6.pdparams # DANet50预训练模型,需要paddle >=1.6.0 - ├── DANet101_pretrained_model_paddle1.6.pdparams # DANet101预训练模型,需要paddle >=1.6.0 - ├── DANet101_better_model_paddle1.6.pdparams # DANet101训练最优模型,需要paddle >=1.6.0 - ├── DANet101_better_model_paddle1.5.2 # DANet101在1.5.2版本训练的最优模型,需要paddle >= 1.5.2 - -``` - -## 模型训练 - -```sh -cd danet -export PYTHONPATH=`pwd`:$PYTHONPATH -# open garbage collection to save memory -export FLAGS_eager_delete_tensor_gb=0.0 -# setting visible devices for train -export CUDA_VISIBLE_DEVICES=0,1,2,3 -``` - -executor执行以下命令进行训练 -```sh -python train_executor.py --backbone resnet101 --base_size 1024 --crop_size 768 --epoch_num 350 --batch_size 2 --lr 0.003 --lr_scheduler poly --warm_up --warmup_epoch 2 --cuda --use_data_parallel --load_pretrained_model --save_model checkpoint/DANet101_better_model_paddle1.5.2 --multi_scales --flip --dilated --multi_grid --scale --multi_dilation 4 8 16 -``` -参数含义: 使用ResNet101骨干网络,训练图片基础大小是1024,裁剪大小是768,训练轮数是350次,batch size是2 -学习率是0.003,学习率衰减策略是poly,使用学习率热身,热身轮数是2轮,使用GPU,使用数据并行, 加载预训练模型,设置加载的模型地址,使用多尺度测试, 使用图片左右翻转测试,使用空洞卷积,使用multi_grid,multi_dilation设置为4 8 16,使用多尺度训练 -##### Windows下训练需要去掉 --use_data_parallel -#### 或者 -dygraph执行以下命令进行训练 -```sh -python train_dygraph.py --backbone resnet101 --base_size 1024 --crop_size 768 --epoch_num 350 --batch_size 2 --lr 0.003 --lr_scheduler poly --cuda --use_data_parallel --load_pretrained_model --save_model checkpoint/DANet101_better_model_paddle1.6 --multi_scales --flip --dilated --multi_grid --scale --multi_dilation 4 8 16 -``` -参数含义: 使用ResNet101骨干网络,训练图片基础大小是1024,裁剪大小是768,训练轮数是350次,batch size是2,学习率是0.003,学习率衰减策略是poly,使用GPU, 使用数据并行,加载预训练模型,设置加载的模型地址,使用多尺度测试,使用图片左右翻转测试,使用空洞卷积,使用multi_grid,multi_dilation设置4 8 16,使用多尺度训练 - -#### 【注】 -##### train_executor.py使用executor方式训练(适合paddle >= 1.5.2),train_dygraph.py使用动态图方式训练(适合paddle >= 1.6.0),两种方式都可以 -##### 动态图方式训练暂时不支持学习率热身 - -#### 在训练阶段,输出的验证结果不是真实的,需要使用eval.py来获得验证的最终结果。 - - ## 模型验证 -```sh -# open garbage collection to save memory -export FLAGS_eager_delete_tensor_gb=0.0 -# setting visible devices for prediction -export CUDA_VISIBLE_DEVICES=0 - -python eval.py --backbone resnet101 --base_size 2048 --crop_size 1024 --cuda --use_data_parallel --load_better_model --save_model checkpoint/DANet101_better_model_paddle1.6 --multi_scales --flip --dilated --multi_grid --multi_dilation 4 8 16 -``` -##### 如果需要把executor训练的参数转成dygraph模式下进行验证的话,请在命令行加上--change_executor_to_dygraph - -## 验证结果 -评测指标:mean IOU(平均交并比) - - -| 模型 | 单尺度 | 多尺度 | -| :---:|:---:| :---:| -|DANet101|0.8043836|0.8138021 - -##### 具体数值 -| 模型 | cls1 | cls2 | cls3 | cls4 | cls5 | cls6 | cls7 | cls8 | cls9 | cls10 | cls11 | cls12 | cls13 | cls14 | cls15 | cls16 |cls17 | cls18 | cls19 | -| :---:|:---: | :---:| :---:|:---: | :---:| :---:|:---: | :---:| :---:|:---: |:---: |:---: |:---: | :---: | :---: |:---: | :---:| :---: |:---: | -|DANet101-SS|0.98212|0.85372|0.92799|0.59976|0.63318|0.65819|0.72023|0.80000|0.92605|0.65788|0.94841|0.83377|0.65206|0.95566|0.87148|0.91233|0.84352|0.71948|0.78737| -|DANet101-MS|0.98047|0.84637|0.93084|0.62699|0.64839|0.67769|0.73650|0.81343|0.92942|0.67010|0.95127|0.84466|0.66635|0.95749|0.87755|0.92370|0.85344|0.73007|0.79742| - -## 输出结果可视化 -![val_1](img/val_1.png) -###### 输入图片 -![val_gt](img/val_gt.png) -###### 图片label -![val_output](img/val_output.png) -###### DANet101模型输出 diff --git a/PaddleCV/Research/danet/checkpoint/.gitkeep b/PaddleCV/Research/danet/checkpoint/.gitkeep deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/PaddleCV/Research/danet/danet.py b/PaddleCV/Research/danet/danet.py deleted file mode 100644 index 566a13e5cb7c9079de704db86647bcf2a5cabf1b..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/danet/danet.py +++ /dev/null @@ -1,641 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import shutil -import paddle.fluid as fluid -import os - - -__all__ = ['DANet'] - - -class ConvBN(fluid.dygraph.Layer): - - def __init__(self, - name_scope, - num_filters, - filter_size=3, - stride=1, - dilation=1, - act=None, - learning_rate=1.0, - dtype='float32', - bias_attr=False): - super(ConvBN, self).__init__(name_scope) - - if dilation != 1: - padding = dilation - else: - padding = (filter_size - 1) // 2 - - self._conv = fluid.dygraph.Conv2D(name_scope, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - dilation=dilation, - act=None, - dtype=dtype, - bias_attr=bias_attr if bias_attr is False else fluid.ParamAttr( - learning_rate=learning_rate, - name='bias'), - param_attr=fluid.ParamAttr( - learning_rate=learning_rate, - name='weight') - ) - self._bn = fluid.dygraph.BatchNorm(name_scope, - num_channels=num_filters, - act=act, - dtype=dtype, - momentum=0.9, - epsilon=1e-5, - bias_attr=fluid.ParamAttr( - learning_rate=learning_rate, - name='bias'), - param_attr=fluid.ParamAttr( - learning_rate=learning_rate, - name='weight'), - moving_mean_name='running_mean', - moving_variance_name='running_var' - ) - - def forward(self, inputs): - x = self._conv(inputs) - x = self._bn(x) - return x - - -class BasicBlock(fluid.dygraph.Layer): - - def __init__(self, - name_scope, - num_filters, - stride=1, - dilation=1, - same=False): - super(BasicBlock, self).__init__(name_scope) - self._conv0 = ConvBN(self.full_name(), - num_filters=num_filters, - filter_size=3, - stride=stride, - dilation=dilation, - act='relu') - self._conv1 = ConvBN(self.full_name(), - num_filters=num_filters, - filter_size=3, - stride=1, - dilation=dilation, - act=None) - - self.same = same - - if not same: - self._skip = ConvBN(self.full_name(), - num_filters=num_filters, - filter_size=1, - stride=stride, - act=None) - - def forward(self, inputs): - x = self._conv0(inputs) - x = self._conv1(x) - if self.same: - skip = inputs - else: - skip = self._skip(inputs) - x = fluid.layers.elementwise_add(x, skip, act='relu') - return x - - -class BottleneckBlock(fluid.dygraph.Layer): - def __init__(self, name_scope, num_filters, stride, dilation=1, same=False): - super(BottleneckBlock, self).__init__(name_scope) - self.expansion = 4 - - self._conv0 = ConvBN(name_scope, - num_filters=num_filters, - filter_size=1, - stride=1, - act='relu') - self._conv1 = ConvBN(name_scope, - num_filters=num_filters, - filter_size=3, - stride=stride, - dilation=dilation, - act='relu') - self._conv2 = ConvBN(name_scope, - num_filters=num_filters * self.expansion, - filter_size=1, - stride=1, - act=None) - self.same = same - - if not same: - self._skip = ConvBN(name_scope, - num_filters=num_filters * self.expansion, - filter_size=1, - stride=stride, - act=None) - - def forward(self, inputs): - x = self._conv0(inputs) - x = self._conv1(x) - x = self._conv2(x) - if self.same: - skip = inputs - else: - skip = self._skip(inputs) - x = fluid.layers.elementwise_add(x, skip, act='relu') - return x - - -class ResNet(fluid.dygraph.Layer): - def __init__(self, - name_scope, - layer=152, - num_class=1000, - dilated=True, - multi_grid=True, - multi_dilation=[4, 8, 16], - need_fc=False): - super(ResNet, self).__init__(name_scope) - - support_layer = [18, 34, 50, 101, 152] - assert layer in support_layer, 'layer({}) not in {}'.format(layer, support_layer) - self.need_fc = need_fc - self.num_filters_list = [64, 128, 256, 512] - if layer == 18: - self.depth = [2, 2, 2, 2] - elif layer == 34: - self.depth = [3, 4, 6, 3] - elif layer == 50: - self.depth = [3, 4, 6, 3] - elif layer == 101: - self.depth = [3, 4, 23, 3] - elif layer == 152: - self.depth = [3, 8, 36, 3] - - if multi_grid: - assert multi_dilation is not None - self.multi_dilation = multi_dilation - - self._conv = ConvBN(name_scope, 64, 7, 2, act='relu') - self._pool = fluid.dygraph.Pool2D(name_scope, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - if layer >= 50: - self.layer1 = self._make_layer(block=BottleneckBlock, - depth=self.depth[0], - num_filters=self.num_filters_list[0], - stride=1, - same=False, - name='layer1') - self.layer2 = self._make_layer(block=BottleneckBlock, - depth=self.depth[1], - num_filters=self.num_filters_list[1], - stride=2, - same=False, - name='layer2') - if dilated: - self.layer3 = self._make_layer(block=BottleneckBlock, - depth=self.depth[2], - num_filters=self.num_filters_list[2], - stride=2, - dilation=2, - same=False, - name='layer3') - if multi_grid: # layer4 采用不同的采样率 - self.layer4 = self._make_layer(block=BottleneckBlock, - depth=self.depth[3], - num_filters=self.num_filters_list[3], - stride=2, - dilation=4, - multi_grid=multi_grid, - multi_dilation=self.multi_dilation, - same=False, - name='layer4') - else: - self.layer4 = self._make_layer(block=BottleneckBlock, - depth=self.depth[3], - num_filters=self.num_filters_list[3], - stride=2, - dilation=4, - same=False, - name='layer4') - else: - self.layer3 = self._make_layer(block=BottleneckBlock, - depth=self.depth[2], - num_filters=self.num_filters_list[2], - stride=2, - dilation=1, - same=False, - name='layer3') - self.layer4 = self._make_layer(block=BottleneckBlock, - depth=self.depth[3], - num_filters=self.num_filters_list[3], - stride=2, - dilation=1, - same=False, - name='layer4') - - else: # layer=18 or layer=34 - self.layer1 = self._make_layer(block=BasicBlock, - depth=self.depth[0], - num_filters=self.num_filters_list[0], - stride=1, - same=True, - name=name_scope) - self.layer2 = self._make_layer(block=BasicBlock, - depth=self.depth[1], - num_filters=self.num_filters_list[1], - stride=2, - same=False, - name=name_scope) - self.layer3 = self._make_layer(block=BasicBlock, - depth=self.depth[2], - num_filters=self.num_filters_list[2], - stride=2, - dilation=1, - same=False, - name=name_scope) - self.layer4 = self._make_layer(block=BasicBlock, - depth=self.depth[3], - num_filters=self.num_filters_list[3], - stride=2, - dilation=1, - same=False, - name=name_scope) - - self._avgpool = fluid.dygraph.Pool2D(name_scope, - global_pooling=True, - pool_type='avg') - self.fc = fluid.dygraph.FC(name_scope, - size=num_class, - act='softmax') - - def _make_layer(self, block, depth, num_filters, stride=1, dilation=1, same=False, multi_grid=False, - multi_dilation=None, name=None): - layers = [] - if dilation != 1: - # stride(2x2) with a dilated convolution instead - stride = 1 - - if multi_grid: - assert len(multi_dilation) == 3 - for depth in range(depth): - temp = block(name + '.{}'.format(depth), - num_filters=num_filters, - stride=stride, - dilation=multi_dilation[depth], - same=same) - stride = 1 - same = True - layers.append(self.add_sublayer('_{}_{}'.format(name, depth + 1), temp)) - else: - for depth in range(depth): - temp = block(name + '.{}'.format(depth), - num_filters=num_filters, - stride=stride, - dilation=dilation if depth > 0 else 1, - same=same) - stride = 1 - same = True - layers.append(self.add_sublayer('_{}_{}'.format(name, depth + 1), temp)) - return layers - - def forward(self, inputs): - x = self._conv(inputs) - - x = self._pool(x) - for layer in self.layer1: - x = layer(x) - c1 = x - - for layer in self.layer2: - x = layer(x) - c2 = x - - for layer in self.layer3: - x = layer(x) - c3 = x - - for layer in self.layer4: - x = layer(x) - c4 = x - - if self.need_fc: - x = self._avgpool(x) - x = self.fc(x) - return x - else: - return c1, c2, c3, c4 - - -class CAM(fluid.dygraph.Layer): - def __init__(self, - name_scope, - in_channels=512, - default_value=0): - """ - channel_attention_module - """ - super(CAM, self).__init__(name_scope) - self.in_channels = in_channels - self.gamma = fluid.layers.create_parameter(shape=[1], - dtype='float32', - is_bias=True, - attr=fluid.ParamAttr( - learning_rate=10.0, - name='cam_gamma'), - default_initializer=fluid.initializer.ConstantInitializer( - value=default_value) - ) - - def forward(self, inputs): - batch_size, c, h, w = inputs.shape - out_b = fluid.layers.reshape(inputs, shape=[batch_size, self.in_channels, h * w]) - out_c = fluid.layers.reshape(inputs, shape=[batch_size, self.in_channels, h * w]) - out_c_t = fluid.layers.transpose(out_c, perm=[0, 2, 1]) - mul_bc = fluid.layers.matmul(out_b, out_c_t) - - mul_bc_max = fluid.layers.reduce_max(mul_bc, dim=-1, keep_dim=True) - mul_bc_max = fluid.layers.expand(mul_bc_max, expand_times=[1, 1, c]) - x = fluid.layers.elementwise_sub(mul_bc_max, mul_bc) - - attention = fluid.layers.softmax(x, use_cudnn=True, axis=-1) - - out_d = fluid.layers.reshape(inputs, shape=[batch_size, self.in_channels, h * w]) - attention_mul = fluid.layers.matmul(attention, out_d) - - attention_reshape = fluid.layers.reshape(attention_mul, shape=[batch_size, self.in_channels, h, w]) - gamma_attention = fluid.layers.elementwise_mul(attention_reshape, self.gamma) - out = fluid.layers.elementwise_add(gamma_attention, inputs) - return out - - -class PAM(fluid.dygraph.Layer): - def __init__(self, - name_scope, - in_channels=512, - default_value=0): - """ - position_attention_module - """ - super(PAM, self).__init__(name_scope) - - assert in_channels // 8, 'in_channel // 8 > 0 ' - self.channel_in = in_channels // 8 - self._convB = fluid.dygraph.Conv2D(name_scope, - num_filters=in_channels // 8, - filter_size=1, - bias_attr=fluid.ParamAttr( - learning_rate=10.0, - name='bias'), - param_attr=fluid.ParamAttr( - learning_rate=10.0, - name='weight') - ) - self._convC = fluid.dygraph.Conv2D(name_scope, - num_filters=in_channels // 8, - filter_size=1, - bias_attr=fluid.ParamAttr( - learning_rate=10.0, - name='bias'), - param_attr=fluid.ParamAttr( - learning_rate=10.0, - name='weight') - ) - self._convD = fluid.dygraph.Conv2D(name_scope, - num_filters=in_channels, - filter_size=1, - bias_attr=fluid.ParamAttr( - learning_rate=10.0, - name='bias'), - param_attr=fluid.ParamAttr( - learning_rate=10.0, - name='weight') - ) - self.gamma = fluid.layers.create_parameter(shape=[1], - dtype='float32', - is_bias=True, - attr=fluid.ParamAttr( - learning_rate=10.0, - name='pam_gamma'), - default_initializer=fluid.initializer.ConstantInitializer( - value=default_value)) - - def forward(self, inputs): - batch_size, c, h, w = inputs.shape - out_b = self._convB(inputs) - out_b_reshape = fluid.layers.reshape(out_b, shape=[batch_size, self.channel_in, h * w]) - out_b_reshape_t = fluid.layers.transpose(out_b_reshape, perm=[0, 2, 1]) - out_c = self._convC(inputs) - out_c_reshape = fluid.layers.reshape(out_c, shape=[batch_size, self.channel_in, h * w]) - - mul_bc = fluid.layers.matmul(out_b_reshape_t, out_c_reshape) - soft_max_bc = fluid.layers.softmax(mul_bc, use_cudnn=True, axis=-1) - - out_d = self._convD(inputs) - out_d_reshape = fluid.layers.reshape(out_d, shape=[batch_size, self.channel_in * 8, h * w]) - attention = fluid.layers.matmul(out_d_reshape, fluid.layers.transpose(soft_max_bc, perm=[0, 2, 1])) - attention = fluid.layers.reshape(attention, shape=[batch_size, self.channel_in * 8, h, w]) - - gamma_attention = fluid.layers.elementwise_mul(attention, self.gamma) - out = fluid.layers.elementwise_add(gamma_attention, inputs) - return out - - -class DAHead(fluid.dygraph.Layer): - def __init__(self, - name_scope, - in_channels, - out_channels, - batch_size): - super(DAHead, self).__init__(name_scope) - self.in_channel = in_channels // 4 - self.batch_size = batch_size - self._conv_bn_relu0 = ConvBN(name_scope, - num_filters=self.in_channel, - filter_size=3, - stride=1, - act='relu', - learning_rate=10.0, - bias_attr=False) - - self._conv_bn_relu1 = ConvBN(name_scope, - num_filters=self.in_channel, - filter_size=3, - stride=1, - act='relu', - learning_rate=10.0, - bias_attr=False) - - self._pam = PAM('pam', in_channels=self.in_channel, default_value=0.0) - self._cam = CAM('cam', in_channels=self.in_channel, default_value=0.0) - - self._conv_bn_relu2 = ConvBN(name_scope, - num_filters=self.in_channel, - filter_size=3, - stride=1, - act='relu', - learning_rate=10.0, - bias_attr=False) - - self._conv_bn_relu3 = ConvBN(name_scope, - num_filters=self.in_channel, - filter_size=3, - stride=1, - act='relu', - learning_rate=10.0, - bias_attr=False) - self._pam_last_conv = fluid.dygraph.Conv2D(name_scope, - num_filters=out_channels, - filter_size=1, - bias_attr=fluid.ParamAttr( - learning_rate=10.0, - name='bias'), - param_attr=fluid.ParamAttr( - learning_rate=10.0, - name='weight') - ) - self._cam_last_conv = fluid.dygraph.Conv2D(name_scope, - num_filters=out_channels, - filter_size=1, - bias_attr=fluid.ParamAttr( - learning_rate=10.0, - name='bias'), - param_attr=fluid.ParamAttr( - learning_rate=10.0, - name='weight') - ) - self._last_conv = fluid.dygraph.Conv2D(name_scope, - num_filters=out_channels, - filter_size=1, - bias_attr=fluid.ParamAttr( - learning_rate=10.0, - name='bias'), - param_attr=fluid.ParamAttr( - learning_rate=10.0, - name='weight') - ) - - def forward(self, inputs): - out = [] - inputs_pam = self._conv_bn_relu0(inputs) - pam = self._pam(inputs_pam) - position = self._conv_bn_relu2(pam) - - batch_size, num_channels = position.shape[:2] - - # dropout2d - ones = fluid.layers.ones(shape=[self.batch_size, num_channels], dtype='float32') - dropout1d_P = fluid.layers.dropout(ones, 0.1, dropout_implementation='upscale_in_train') - out_position_drop2d = fluid.layers.elementwise_mul(position, dropout1d_P, axis=0) - dropout1d_P.stop_gradient = True - - inputs_cam = self._conv_bn_relu1(inputs) - cam = self._cam(inputs_cam) - channel = self._conv_bn_relu3(cam) - - # dropout2d - ones2 = fluid.layers.ones(shape=[self.batch_size, num_channels], dtype='float32') - dropout1d_C = fluid.layers.dropout(ones2, 0.1, dropout_implementation='upscale_in_train') - out_channel_drop2d = fluid.layers.elementwise_mul(channel, dropout1d_C, axis=0) - dropout1d_C.stop_gradient = True - position_out = self._pam_last_conv(out_position_drop2d) - channel_out = self._cam_last_conv(out_channel_drop2d) - - feat_sum = fluid.layers.elementwise_add(position, channel, axis=1) - feat_sum_batch_size, feat_sum_num_channels = feat_sum.shape[:2] - - # dropout2d - feat_sum_ones = fluid.layers.ones(shape=[self.batch_size, feat_sum_num_channels], dtype='float32') - dropout1d_sum = fluid.layers.dropout(feat_sum_ones, 0.1, dropout_implementation='upscale_in_train') - dropout2d_feat_sum = fluid.layers.elementwise_mul(feat_sum, dropout1d_sum, axis=0) - dropout1d_sum.stop_gradient = True - feat_sum_out = self._last_conv(dropout2d_feat_sum) - - out.append(feat_sum_out) - out.append(position_out) - out.append(channel_out) - return tuple(out) - - -class DANet(fluid.dygraph.Layer): - def __init__(self, - name_scope, - backbone='resnet50', - num_classes=19, - batch_size=1, - dilated=True, - multi_grid=True, - multi_dilation=[4, 8, 16]): - super(DANet, self).__init__(name_scope) - if backbone == 'resnet50': - print('backbone resnet50, dilated={}, multi_grid={}, ' - 'multi_dilation={}'.format(dilated, multi_grid, multi_dilation)) - self._backone = ResNet('resnet50', layer=50, dilated=dilated, - multi_grid=multi_grid, multi_dilation=multi_dilation) - elif backbone == 'resnet101': - print('backbone resnet101, dilated={}, multi_grid={}, ' - 'multi_dilation={}'.format(dilated, multi_grid, multi_dilation)) - self._backone = ResNet('resnet101', layer=101, dilated=dilated, - multi_grid=multi_grid, multi_dilation=multi_dilation) - elif backbone == 'resnet152': - print('backbone resnet152, dilated={}, multi_grid={}, ' - 'multi_dilation={}'.format(dilated, multi_grid, multi_dilation)) - self._backone = ResNet('resnet152', layer=152, dilated=dilated, - multi_grid=multi_grid, multi_dilation=multi_dilation) - else: - raise ValueError('unknown backbone: {}'.format(backbone)) - - self._head = DAHead('DA_head', in_channels=2048, out_channels=num_classes, batch_size=batch_size) - - def forward(self, inputs): - h, w = inputs.shape[2:] - _, _, c3, c4 = self._backone(inputs) - x1, x2, x3 = self._head(c4) - out = [] - out1 = fluid.layers.resize_bilinear(x1, out_shape=[h, w]) - out2 = fluid.layers.resize_bilinear(x2, out_shape=[h, w]) - out3 = fluid.layers.resize_bilinear(x3, out_shape=[h, w]) - out.append(out1) - out.append(out2) - out.append(out3) - return out - - -def copy_model(path, new_path): - shutil.rmtree(new_path, ignore_errors=True) - shutil.copytree(path, new_path) - model_path = os.path.join(new_path, '__model__') - if os.path.exists(model_path): - os.remove(model_path) - - -if __name__ == '__main__': - import numpy as np - - with fluid.dygraph.guard(fluid.CPUPlace()): - x = np.random.randn(2, 3, 224, 224).astype('float32') - x = fluid.dygraph.to_variable(x) - model = DANet('test', backbone='resnet101', num_classes=19, batch_size=2) - y = model(x) - print(y[0].shape) diff --git a/PaddleCV/Research/danet/dataset/.gitkeep b/PaddleCV/Research/danet/dataset/.gitkeep deleted file mode 100644 index 8b137891791fe96927ad78e64b0aad7bded08bdc..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/danet/dataset/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - diff --git a/PaddleCV/Research/danet/eval.py b/PaddleCV/Research/danet/eval.py deleted file mode 100644 index 46c825fabb71e8fee5834d2c09d5a2332833e007..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/danet/eval.py +++ /dev/null @@ -1,410 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" -os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = "0.99" - -import paddle.fluid as fluid -import paddle -import logging -import math -import numpy as np -import shutil -import os - -from PIL import ImageOps, Image, ImageEnhance, ImageFilter -from datetime import datetime - -from danet import DANet -from options import Options -from utils.cityscapes_data import cityscapes_train -from utils.cityscapes_data import cityscapes_val -from utils.cityscapes_data import cityscapes_test -from utils.lr_scheduler import Lr -from iou import IOUMetric - -# globals -data_mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1) -data_std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1) - - -def pad_single_image(image, crop_size): - w, h = image.size - pad_h = crop_size - h if h < crop_size else 0 - pad_w = crop_size - w if w < crop_size else 0 - image = ImageOps.expand(image, border=(0, 0, pad_w, pad_h), fill=0) - assert (image.size[0] >= crop_size and image.size[1] >= crop_size) - return image - - -def crop_image(image, h0, w0, h1, w1): - return image.crop((w0, h0, w1, h1)) - - -def flip_left_right_image(image): - return image.transpose(Image.FLIP_LEFT_RIGHT) - - -def resize_image(image, out_h, out_w, mode=Image.BILINEAR): - return image.resize((out_w, out_h), mode) - - -def mapper_image(image): - image_array = np.array(image) - image_array = image_array.transpose((2, 0, 1)) - image_array = image_array / 255.0 - image_array = (image_array - data_mean) / data_std - image_array = image_array.astype('float32') - image_array = image_array[np.newaxis, :] - return image_array - - -def get_model(args): - model = DANet('DANet', - backbone=args.backbone, - num_classes=args.num_classes, - batch_size=1, - dilated=args.dilated, - multi_grid=args.multi_grid, - multi_dilation=args.multi_dilation) - return model - - -def copy_model(path, new_path): - shutil.rmtree(new_path, ignore_errors=True) - shutil.copytree(path, new_path) - model_path = os.path.join(new_path, '__model__') - if os.path.exists(model_path): - os.remove(model_path) - - -def mean_iou(pred, label, num_classes=19): - label = fluid.layers.elementwise_min(fluid.layers.cast(label, np.int32), - fluid.layers.assign(np.array([num_classes], dtype=np.int32))) - label_ig = (label == num_classes).astype('int32') - label_ng = (label != num_classes).astype('int32') - pred = fluid.layers.cast(fluid.layers.argmax(pred, axis=1), 'int32') - pred = pred * label_ng + label_ig * num_classes - miou, wrong, correct = fluid.layers.mean_iou(pred, label, num_classes + 1) - label.stop_gradient = True - return miou, wrong, correct - - -def change_model_executor_to_dygraph(args): - temp_image = fluid.layers.data(name='temp_image', shape=[3, 224, 224], dtype='float32') - model = get_model(args) - y = model(temp_image) - if args.cuda: - gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) - place = fluid.CUDAPlace(gpu_id) if args.cuda else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - model_path = args.save_model - assert os.path.exists(model_path), "Please check whether the executor model file address {} exists. " \ - "Note: the executor model file is multiple files.".format(model_path) - fluid.io.load_persistables(exe, model_path, fluid.default_main_program()) - print('load executor train model successful, start change!') - param_list = fluid.default_main_program().block(0).all_parameters() - param_name_list = [p.name for p in param_list] - temp_dict = {} - for name in param_name_list: - tensor = fluid.global_scope().find_var(name).get_tensor() - npt = np.asarray(tensor) - temp_dict[name] = npt - del model - with fluid.dygraph.guard(): - x = np.random.randn(1, 3, 224, 224).astype('float32') - x = fluid.dygraph.to_variable(x) - model = get_model(args) - y = model(x) - new_param_dict = {} - for k, v in temp_dict.items(): - value = v - value_shape = value.shape - name = k - tensor = fluid.layers.create_parameter(shape=value_shape, - name=name, - dtype='float32', - default_initializer=fluid.initializer.NumpyArrayInitializer(value)) - new_param_dict[name] = tensor - assert len(new_param_dict) == len( - model.state_dict()), "The number of parameters is not equal. Loading parameters failed, " \ - "Please check whether the model is consistent!" - model.set_dict(new_param_dict) - fluid.save_dygraph(model.state_dict(), model_path) - del model - del temp_dict - print('change executor model to dygraph successful!') - - -def eval(args): - if args.change_executor_to_dygraph: - change_model_executor_to_dygraph(args) - with fluid.dygraph.guard(): - num_classes = args.num_classes - base_size = args.base_size - crop_size = args.crop_size - multi_scales = args.multi_scales - flip = args.flip - - if not multi_scales: - scales = [1.0] - else: - # scales = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.2] - scales = [0.5, 0.75, 1.0, 1.25, 1.35, 1.5, 1.75, 2.0, 2.2] # It might work better - - if len(scales) == 1: # single scale - # stride_rate = 2.0 / 3.0 - stride_rate = 1.0 / 2.0 # It might work better - else: - stride_rate = 1.0 / 2.0 - stride = int(crop_size * stride_rate) # slid stride - - model = get_model(args) - x = np.random.randn(1, 3, 224, 224).astype('float32') - x = fluid.dygraph.to_variable(x) - y = model(x) - iou = IOUMetric(num_classes) - model_path = args.save_model - # load_better_model - if paddle.__version__ == '1.5.2' and args.load_better_model: - assert os.path.exists(model_path), "your input save_model: {} ,but '{}' is not exists".format( - model_path, model_path) - print('better model exist!') - new_model_path = 'dygraph/' + model_path - copy_model(model_path, new_model_path) - model_param, _ = fluid.dygraph.load_persistables(new_model_path) - model.load_dict(model_param) - elif args.load_better_model: - assert os.path.exists(model_path + '.pdparams'), "your input save_model: {} ,but '{}' is not exists".format( - model_path, model_path + '.pdparams') - print('better model exist!') - model_param, _ = fluid.dygraph.load_dygraph(model_path) - model.load_dict(model_param) - else: - raise ValueError('Please set --load_better_model!') - - assert len(model_param) == len( - model.state_dict()), "The number of parameters is not equal. Loading parameters failed, " \ - "Please check whether the model is consistent!" - model.eval() - - prev_time = datetime.now() - # reader = cityscapes_test(split='test', base_size=2048, crop_size=1024, scale=True, xmap=True) - reader = cityscapes_test(split='val', base_size=2048, crop_size=1024, scale=True, xmap=True) - - print('MultiEvalModule: base_size {}, crop_size {}'. - format(base_size, crop_size)) - print('scales: {}'.format(scales)) - print('val ing...') - logging.basicConfig(level=logging.INFO, - filename='DANet_{}_eval_dygraph.log'.format(args.backbone), - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') - logging.info('DANet') - logging.info(args) - palette = pat() - for data in reader(): - image = data[0] - label_path = data[1] # val_label is a picture, test_label is a path - label = Image.open(label_path, mode='r') # val_label is a picture, test_label is a path - save_png_path = label_path.replace('val', '{}_val'.format(args.backbone)).replace('test', '{}_test'.format( - args.backbone)) - label_np = np.array(label) - w, h = image.size # h 1024, w 2048 - scores = np.zeros(shape=[num_classes, h, w], dtype='float32') - for scale in scales: - long_size = int(math.ceil(base_size * scale)) # long_size - if h > w: - height = long_size - width = int(1.0 * w * long_size / h + 0.5) - short_size = width - else: - width = long_size - height = int(1.0 * h * long_size / w + 0.5) - short_size = height - - cur_img = resize_image(image, height, width) - # pad - if long_size <= crop_size: - pad_img = pad_single_image(cur_img, crop_size) - pad_img = mapper_image(pad_img) - pad_img = fluid.dygraph.to_variable(pad_img) - pred1, pred2, pred3 = model(pad_img) - pred1 = pred1.numpy() - outputs = pred1[:, :, :height, :width] - if flip: - pad_img_filp = flip_left_right_image(cur_img) - pad_img_filp = pad_single_image(pad_img_filp, crop_size) # pad - pad_img_filp = mapper_image(pad_img_filp) - pad_img_filp = fluid.dygraph.to_variable(pad_img_filp) - pred1, pred2, pred3 = model(pad_img_filp) - pred1 = fluid.layers.reverse(pred1, axis=3) - pred1 = pred1.numpy() - outputs += pred1[:, :, :height, :width] - else: - if short_size < crop_size: - # pad if needed - pad_img = pad_single_image(cur_img, crop_size) - else: - pad_img = cur_img - pw, ph = pad_img.size - assert (ph >= height and pw >= width) - - # slid window - h_grids = int(math.ceil(1.0 * (ph - crop_size) / stride)) + 1 - w_grids = int(math.ceil(1.0 * (pw - crop_size) / stride)) + 1 - outputs = np.zeros(shape=[1, num_classes, ph, pw], dtype='float32') - count_norm = np.zeros(shape=[1, 1, ph, pw], dtype='int32') - for idh in range(h_grids): - for idw in range(w_grids): - h0 = idh * stride - w0 = idw * stride - h1 = min(h0 + crop_size, ph) - w1 = min(w0 + crop_size, pw) - crop_img = crop_image(pad_img, h0, w0, h1, w1) - pad_crop_img = pad_single_image(crop_img, crop_size) - pad_crop_img = mapper_image(pad_crop_img) - pad_crop_img = fluid.dygraph.to_variable(pad_crop_img) - pred1, pred2, pred3 = model(pad_crop_img) # shape [1, num_class, h, w] - pred = pred1.numpy() # channel, h, w - outputs[:, :, h0:h1, w0:w1] += pred[:, :, 0:h1 - h0, 0:w1 - w0] - count_norm[:, :, h0:h1, w0:w1] += 1 - if flip: - pad_img_filp = flip_left_right_image(crop_img) - pad_img_filp = pad_single_image(pad_img_filp, crop_size) # pad - pad_img_array = mapper_image(pad_img_filp) - pad_img_array = fluid.dygraph.to_variable(pad_img_array) - pred1, pred2, pred3 = model(pad_img_array) - pred1 = fluid.layers.reverse(pred1, axis=3) - pred = pred1.numpy() - outputs[:, :, h0:h1, w0:w1] += pred[:, :, 0:h1 - h0, 0:w1 - w0] - count_norm[:, :, h0:h1, w0:w1] += 1 - assert ((count_norm == 0).sum() == 0) - outputs = outputs / count_norm - outputs = outputs[:, :, :height, :width] - outputs = fluid.dygraph.to_variable(outputs) - outputs = fluid.layers.resize_bilinear(outputs, out_shape=[h, w]) - score = outputs.numpy()[0] - scores += score # the sum of all scales, shape: [channel, h, w] - pred = np.argmax(score, axis=0).astype('uint8') - picture_path = '{}'.format(save_png_path).replace('.png', '_scale_{}'.format(scale)) - save_png(pred, palette, picture_path) - pred = np.argmax(scores, axis=0).astype('uint8') - picture_path = '{}'.format(save_png_path).replace('.png', '_scores') - save_png(pred, palette, picture_path) - iou.add_batch(pred, label_np) # cal iou - print('eval done!') - logging.info('eval done!') - acc, acc_cls, iu, mean_iu, fwavacc, kappa = iou.evaluate() - print('acc = {}'.format(acc)) - logging.info('acc = {}'.format(acc)) - print('acc_cls = {}'.format(acc_cls)) - logging.info('acc_cls = {}'.format(acc_cls)) - print('iu = {}'.format(iu)) - logging.info('iu = {}'.format(iu)) - print('mean_iou -- 255 = {}'.format(mean_iu)) - logging.info('mean_iou --255 = {}'.format(mean_iu)) - print('mean_iou = {}'.format(np.nanmean(iu[:-1]))) # realy iou - logging.info('mean_iou = {}'.format(np.nanmean(iu[:-1]))) - print('fwavacc = {}'.format(fwavacc)) - logging.info('fwavacc = {}'.format(fwavacc)) - print('kappa = {}'.format(kappa)) - logging.info('kappa = {}'.format(kappa)) - cur_time = datetime.now() - h, remainder = divmod((cur_time - prev_time).seconds, 3600) - m, s = divmod(remainder, 60) - time_str = "Time %02d:%02d:%02d" % (h, m, s) - print('val ' + time_str) - logging.info('val ' + time_str) - - -def save_png(pred_value, palette, name): - if isinstance(pred_value, np.ndarray): - if pred_value.ndim == 3: - batch_size = pred_value.shape[0] - if batch_size == 1: - pred_value = pred_value.squeeze(axis=0) - image = Image.fromarray(pred_value).convert('P') - image.putpalette(palette) - save_path = '{}.png'.format(name) - save_dir = os.path.dirname(save_path) - if not os.path.exists(save_dir): - os.makedirs(save_dir) - image.save(save_path) - else: - for batch_id in range(batch_size): - value = pred_value[batch_id] - image = Image.fromarray(value).convert('P') - image.putpalette(palette) - save_path = '{}.png'.format(name[batch_id]) - save_dir = os.path.dirname(save_path) - if not os.path.exists(save_dir): - os.makedirs(save_dir) - image.save(save_path) - elif pred_value.ndim == 2: - image = Image.fromarray(pred_value).convert('P') - image.putpalette(palette) - save_path = '{}.png'.format(name) - save_dir = os.path.dirname(save_path) - if not os.path.exists(save_dir): - os.makedirs(save_dir) - image.save(save_path) - else: - raise ValueError('Only support nd-array') - - -def save_png_test(path): - im = Image.open(path) - im_array = np.array(im).astype('uint8') - save_png(im_array, pat(), 'save_png_test') - - -def pat(): - palette = [] - for i in range(256): - palette.extend((i, i, i)) - palette[:3 * 19] = np.array([[128, 64, 128], - [244, 35, 232], - [70, 70, 70], - [102, 102, 156], - [190, 153, 153], - [153, 153, 153], - [250, 170, 30], - [220, 220, 0], - [107, 142, 35], - [152, 251, 152], - [70, 130, 180], - [220, 20, 60], - [255, 0, 0], - [0, 0, 142], - [0, 0, 70], - [0, 60, 100], - [0, 80, 100], - [0, 0, 230], - [119, 11, 32]], dtype='uint8').flatten() - return palette - - -if __name__ == '__main__': - options = Options() - args = options.parse() - options.print_args() - eval(args) - diff --git a/PaddleCV/Research/danet/img/Network.png b/PaddleCV/Research/danet/img/Network.png deleted file mode 100644 index ac109b403a122a0241cb391c2d17b45ca43cb41b..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/danet/img/Network.png and /dev/null differ diff --git a/PaddleCV/Research/danet/img/channel.png b/PaddleCV/Research/danet/img/channel.png deleted file mode 100644 index eae8854c4252dec561f0b71febf5ddf1372b428c..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/danet/img/channel.png and /dev/null differ diff --git a/PaddleCV/Research/danet/img/position.png b/PaddleCV/Research/danet/img/position.png deleted file mode 100644 index b46f9e1751783eb338b4554da70696df5e411457..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/danet/img/position.png and /dev/null differ diff --git a/PaddleCV/Research/danet/img/val_1.png b/PaddleCV/Research/danet/img/val_1.png deleted file mode 100644 index 4f4610d36f3d16ec669a89aaaf6ee71b24982435..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/danet/img/val_1.png and /dev/null differ diff --git a/PaddleCV/Research/danet/img/val_gt.png b/PaddleCV/Research/danet/img/val_gt.png deleted file mode 100644 index 5a0d27351a66a0cab3f885f86e42141a7f96b06d..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/danet/img/val_gt.png and /dev/null differ diff --git a/PaddleCV/Research/danet/img/val_output.png b/PaddleCV/Research/danet/img/val_output.png deleted file mode 100644 index 3d9ee2191629b8dad656672e99716e1bcb6f720c..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/danet/img/val_output.png and /dev/null differ diff --git a/PaddleCV/Research/danet/iou.py b/PaddleCV/Research/danet/iou.py deleted file mode 100644 index 1f560a3041c29f47deb70a7eecbe937d1d096317..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/danet/iou.py +++ /dev/null @@ -1,74 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - - -class IOUMetric(object): - - def __init__(self, num_classes): - self.num_classes = num_classes + 1 - self.hist = np.zeros((num_classes + 1, num_classes + 1)) - - def _fast_hist(self, label_pred, label_true): - mask = (label_true >= 0) & (label_true < self.num_classes) - hist = np.bincount( - self.num_classes * label_true[mask].astype(int) + - label_pred[mask], minlength=self.num_classes ** 2).reshape(self.num_classes, self.num_classes) - return hist - - def add_batch(self, predictions, gts): - # gts = BHW - # predictions = BHW - if isinstance(gts, np.ndarray): - gts_ig = (gts == 255).astype(np.int32) - gts_nig = (gts != 255).astype(np.int32) - # print(predictions) - gts[gts == 255] = self.num_classes - 1 # 19 - predictions = gts_nig * predictions + gts_ig * (self.num_classes - 1) - # print(predictions) - for lp, lt in zip(predictions, gts): - self.hist += self._fast_hist(lp.flatten(), lt.flatten()) - - def evaluate(self): - acc = np.diag(self.hist).sum() / self.hist.sum() - acc_cls = np.nanmean(np.diag(self.hist) / self.hist.sum(axis=1)) - iu = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - np.diag(self.hist)) - mean_iu = np.nanmean(iu) - freq = self.hist.sum(axis=1) / self.hist.sum() - fwavacc = (freq[freq > 0] * iu[freq > 0]).sum() - kappa = (self.hist.sum() * np.diag(self.hist).sum() - (self.hist.sum(axis=0) * self.hist.sum(axis=1)).sum()) / ( - self.hist.sum() ** 2 - (self.hist.sum(axis=0) * self.hist.sum(axis=1)).sum()) - return acc, acc_cls, iu, mean_iu, fwavacc, kappa - - def evaluate_kappa(self): - kappa = (self.hist.sum() * np.diag(self.hist).sum() - (self.hist.sum(axis=0) * self.hist.sum(axis=1)).sum()) / ( - self.hist.sum() ** 2 - (self.hist.sum(axis=0) * self.hist.sum(axis=1)).sum()) - return kappa - - def evaluate_iou_kappa(self): - iu = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - np.diag(self.hist)) - mean_iu = np.nanmean(iu) - kappa = (self.hist.sum() * np.diag(self.hist).sum() - (self.hist.sum(axis=0) * self.hist.sum(axis=1)).sum()) / ( - self.hist.sum() ** 2 - (self.hist.sum(axis=0) * self.hist.sum(axis=1)).sum()) - return mean_iu, kappa - - def evaluate_iu(self): - iu = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - np.diag(self.hist)) - return iu - diff --git a/PaddleCV/Research/danet/options.py b/PaddleCV/Research/danet/options.py deleted file mode 100644 index 40f73feef8ae2ee53491c506cba8cb5232e0e4c8..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/danet/options.py +++ /dev/null @@ -1,176 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import argparse - - -class Options(object): - def __init__(self): - parser = argparse.ArgumentParser(description='Paddle DANet Segmentation') - - # model and dataset - parser.add_argument('--model', type=str, default='danet', - help='model name (default: danet)') - parser.add_argument('--backbone', type=str, default='resnet101', - help='backbone name (default: resnet101)') - parser.add_argument('--dataset', type=str, default='cityscapes', - help='dataset name (default: cityscapes)') - parser.add_argument('--num_classes', type=int, default=19, - help='num_classes (default: cityscapes = 19)') - parser.add_argument('--data_folder', type=str, - default='./dataset', - help='training dataset folder (default: ./dataset') - parser.add_argument('--base_size', type=int, default=1024, - help='base image size') - parser.add_argument('--crop_size', type=int, default=768, - help='crop image size') - - # training hyper params - parser.add_argument('--epoch_num', type=int, default=None, metavar='N', - help='number of epochs to train (default: auto)') - parser.add_argument('--start_epoch', type=int, default=0, - metavar='N', help='start epochs (default:0)') - parser.add_argument('--batch_size', type=int, default=None, - metavar='N', help='input batch size for \ - training (default: auto)') - parser.add_argument('--test_batch_size', type=int, default=None, - metavar='N', help='input batch size for \ - testing (default: same as batch size)') - - # optimizer params - parser.add_argument('--lr', type=float, default=None, metavar='LR', - help='learning rate (default: auto)') - parser.add_argument('--lr_scheduler', type=str, default='poly', - help='learning rate scheduler (default: poly)') - parser.add_argument('--lr_pow', type=float, default=0.9, - help='learning rate scheduler (default: 0.9)') - parser.add_argument('--lr_step', type=int, default=None, - help='lr step to change lr') - parser.add_argument('--warm_up', action='store_true', default=False, - help='warm_up (default: False)') - parser.add_argument('--warmup_epoch', type=int, default=5, - help='warmup_epoch (default: 5)') - parser.add_argument('--total_step', type=int, default=None, - metavar='N', help='total_step (default: auto)') - parser.add_argument('--step_per_epoch', type=int, default=None, - metavar='N', help='step_per_epoch (default: auto)') - parser.add_argument('--momentum', type=float, default=0.9, - metavar='M', help='momentum (default: 0.9)') - parser.add_argument('--weight_decay', type=float, default=1e-4, - metavar='M', help='w-decay (default: 1e-4)') - - # cuda, seed and logging - parser.add_argument('--cuda', action='store_true', default=False, - help='use CUDA training, (default: False)') - parser.add_argument('--use_data_parallel', action='store_true', default=False, - help='use data_parallel training, (default: False)') - parser.add_argument('--seed', type=int, default=1, metavar='S', - help='random seed (default: 1)') - parser.add_argument('--log_root', type=str, - default='./', help='set a log path folder') - - # checkpoint - parser.add_argument("--save_model", default='checkpoint/DANet101_better_model_paddle1.6', type=str, - help="model path, (default: checkpoint/DANet101_better_model_paddle1.6)") - - # change executor model params to dygraph model params - parser.add_argument("--change_executor_to_dygraph", action='store_true', default=False, - help="change executor model params to dygraph model params (default:False)") - - # finetuning pre-trained models - parser.add_argument("--load_pretrained_model", action='store_true', default=False, - help="load pretrained model (default: False)") - # load better models - parser.add_argument("--load_better_model", action='store_true', default=False, - help="load better model (default: False)") - parser.add_argument('--multi_scales', action='store_true', default=False, - help="testing scale, (default: False)") - parser.add_argument('--flip', action='store_true', default=False, - help="testing flip image, (default: False)") - - # multi grid dilation option - parser.add_argument("--dilated", action='store_true', default=False, - help="use dilation policy, (default: False)") - parser.add_argument("--multi_grid", action='store_true', default=False, - help="use multi grid dilation policy, default: False") - parser.add_argument('--multi_dilation', nargs='+', type=int, default=None, - help="multi grid dilation list, (default: None), can use --mutil_dilation 4 8 16") - parser.add_argument('--scale', action='store_true', default=False, - help='choose to use random scale transform(0.75-2.0) for train, (default: False)') - - # the parser - self.parser = parser - - def parse(self): - args = self.parser.parse_args() - # default settings for epochs, batch_size and lr - if args.epoch_num is None: - epoches = { - 'pascal_voc': 180, - 'pascal_aug': 180, - 'pcontext': 180, - 'ade20k': 180, - 'cityscapes': 350, - } - num_class_dict = { - 'pascal_voc': 21, - 'pascal_aug': 21, - 'pcontext': 21, - 'ade20k': None, - 'cityscapes': 19, - } - total_steps = { - 'pascal_voc': 200000, - 'pascal_aug': 500000, - 'pcontext': 500000, - 'ade20k': 500000, - 'cityscapes': 150000, - } - args.epoch_num = epoches[args.dataset.lower()] - args.num_classes = num_class_dict[args.dataset.lower()] - args.total_step = total_steps[args.dataset.lower()] - if args.batch_size is None: - args.batch_size = 2 - if args.test_batch_size is None: - args.test_batch_size = args.batch_size - if args.step_per_epoch is None: - step_per_epoch = { - 'pascal_voc': 185, - 'pascal_aug': 185, - 'pcontext': 185, - 'ade20k': 185, - 'cityscapes': 371, # 2975 // batch_size // GPU_num - } - args.step_per_epoch = step_per_epoch[args.dataset.lower()] - if args.lr is None: - lrs = { - 'pascal_voc': 0.0001, - 'pascal_aug': 0.001, - 'pcontext': 0.001, - 'ade20k': 0.01, - 'cityscapes': 0.003, - } - args.lr = lrs[args.dataset.lower()] / 8 * args.batch_size - return args - - def print_args(self): - arg_dict = self.parse().__dict__ - for k, v in arg_dict.items(): - print('{:30s}: {}'.format(k, v)) - diff --git a/PaddleCV/Research/danet/train_dygraph.py b/PaddleCV/Research/danet/train_dygraph.py deleted file mode 100644 index df610999e5eaff47aafe3e53b18833b1eb73b576..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/danet/train_dygraph.py +++ /dev/null @@ -1,353 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" -os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = "0.99" - -import paddle.fluid as fluid -import numpy as np -import random -import paddle -import logging -import shutil -import multiprocessing -import sys -from datetime import datetime -from paddle.utils import Ploter - -from danet import DANet -from options import Options -from utils.cityscapes_data import cityscapes_train -from utils.cityscapes_data import cityscapes_val -from utils.lr_scheduler import Lr -import matplotlib - -matplotlib.use('Agg') - - -def get_model(args): - model = DANet('DANet', - backbone=args.backbone, - num_classes=args.num_classes, - batch_size=args.batch_size, - dilated=args.dilated, - multi_grid=args.multi_grid, - multi_dilation=args.multi_dilation) - return model - - -def _cpu_num(): - if "CPU_NUM" not in os.environ.keys(): - if multiprocessing.cpu_count() > 1: - sys.stderr.write( - '!!! The CPU_NUM is not specified, you should set CPU_NUM in the environment variable list.\n' - 'CPU_NUM indicates that how many CPUPlace are used in the current task.\n' - 'And if this parameter are set as N (equal to the number of physical CPU core) the program may be faster.\n\n' - 'export CPU_NUM={} # for example, set CPU_NUM as number of physical CPU core which is {}.\n\n' - '!!! The default number of CPU_NUM=1.\n'.format( - multiprocessing.cpu_count(), multiprocessing.cpu_count())) - os.environ['CPU_NUM'] = str(1) - cpu_num = os.environ.get('CPU_NUM') - return int(cpu_num) - - -def mean_iou(pred, label, num_classes=19): - label = fluid.layers.elementwise_min(fluid.layers.cast(label, np.int32), - fluid.layers.assign(np.array([num_classes], dtype=np.int32))) - label_ig = (label == num_classes).astype('int32') - label_ng = (label != num_classes).astype('int32') - pred = fluid.layers.cast(fluid.layers.argmax(pred, axis=1), 'int32') - pred = pred * label_ng + label_ig * num_classes - miou, wrong, correct = fluid.layers.mean_iou(pred, label, num_classes + 1) - label.stop_gradient = True - return miou, wrong, correct - - -def loss_fn(pred, pred2, pred3, label, num_classes=19): - pred = fluid.layers.transpose(pred, perm=[0, 2, 3, 1]) - pred = fluid.layers.reshape(pred, [-1, num_classes]) - - pred2 = fluid.layers.transpose(pred2, perm=[0, 2, 3, 1]) - pred2 = fluid.layers.reshape(pred2, [-1, num_classes]) - - pred3 = fluid.layers.transpose(pred3, perm=[0, 2, 3, 1]) - pred3 = fluid.layers.reshape(pred3, [-1, num_classes]) - - label = fluid.layers.reshape(label, [-1, 1]) - - pred = fluid.layers.softmax(pred, use_cudnn=False) - loss1 = fluid.layers.cross_entropy(pred, label, ignore_index=255) - - pred2 = fluid.layers.softmax(pred2, use_cudnn=False) - loss2 = fluid.layers.cross_entropy(pred2, label, ignore_index=255) - - pred3 = fluid.layers.softmax(pred3, use_cudnn=False) - loss3 = fluid.layers.cross_entropy(pred3, label, ignore_index=255) - - label.stop_gradient = True - return loss1 + loss2 + loss3 - - -def optimizer_setting(args): - if args.weight_decay is not None: - regular = fluid.regularizer.L2Decay(regularization_coeff=args.weight_decay) - else: - regular = None - if args.lr_scheduler == 'poly': - lr_scheduler = Lr(lr_policy='poly', - base_lr=args.lr, - epoch_nums=args.epoch_num, - step_per_epoch=args.step_per_epoch, - power=args.lr_pow, - warm_up=args.warm_up, - warmup_epoch=args.warmup_epoch) - decayed_lr = lr_scheduler.get_lr() - elif args.lr_scheduler == 'cosine': - lr_scheduler = Lr(lr_policy='cosine', - base_lr=args.lr, - epoch_nums=args.epoch_num, - step_per_epoch=args.step_per_epoch, - warm_up=args.warm_up, - warmup_epoch=args.warmup_epoch) - decayed_lr = lr_scheduler.get_lr() - elif args.lr_scheduler == 'piecewise': - lr_scheduler = Lr(lr_policy='piecewise', - base_lr=args.lr, - epoch_nums=args.epoch_num, - step_per_epoch=args.step_per_epoch, - warm_up=args.warm_up, - warmup_epoch=args.warmup_epoch, - decay_epoch=[50, 100, 150], - gamma=0.1) - decayed_lr = lr_scheduler.get_lr() - else: - decayed_lr = args.lr - return fluid.optimizer.MomentumOptimizer(learning_rate=decayed_lr, - momentum=args.momentum, - regularization=regular) - - -def main(args): - batch_size = args.batch_size - num_epochs = args.epoch_num - num_classes = args.num_classes - data_root = args.data_folder - if args.cuda: - num = fluid.core.get_cuda_device_count() - print('The number of GPU: {}'.format(num)) - else: - num = _cpu_num() - print('The number of CPU: {}'.format(num)) - - # program - start_prog = fluid.default_startup_program() - train_prog = fluid.default_main_program() - - start_prog.random_seed = args.seed - train_prog.random_seed = args.seed - np.random.seed(args.seed) - random.seed(args.seed) - - logging.basicConfig(level=logging.INFO, - filename='DANet_{}_train_dygraph.log'.format(args.backbone), - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') - logging.info('DANet') - logging.info(args) - - if args.cuda: - gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) - - place = fluid.CUDAPlace(gpu_id) if args.cuda else fluid.CPUPlace() - train_loss_title = 'Train_loss' - test_loss_title = 'Test_loss' - - train_iou_title = 'Train_mIOU' - test_iou_title = 'Test_mIOU' - - plot_loss = Ploter(train_loss_title, test_loss_title) - plot_iou = Ploter(train_iou_title, test_iou_title) - - with fluid.dygraph.guard(place): - - model = get_model(args) - x = np.random.randn(batch_size, 3, 224, 224).astype('float32') - x = fluid.dygraph.to_variable(x) - model(x) - - # load_pretrained_model - if args.load_pretrained_model: - save_dir = args.save_model - assert os.path.exists(save_dir + '.pdparams'), "your input save_model: {} ,but '{}' is not exists".format( - save_dir, save_dir + '.pdparams') - param, _ = fluid.load_dygraph(save_dir) - model.set_dict(param) - assert len(param) == len( - model.state_dict()), "The number of parameters is not equal. Loading parameters failed, " \ - "Please check whether the model is consistent!" - print('load pretrained model!') - - # load_better_model - if args.load_better_model: - save_dir = args.save_model - assert os.path.exists(save_dir + '.pdparams'), "your input save_model: {} ,but '{}' is not exists".format( - save_dir, save_dir + '.pdparams') - param, _ = fluid.load_dygraph(save_dir) - model.set_dict(param) - assert len(param) == len( - model.state_dict()), "The number of parameters is not equal. Loading parameters failed, " \ - "Please check whether the model is consistent!" - print('load better model!') - - optimizer = optimizer_setting(args) - train_data = cityscapes_train(data_root=data_root, - base_size=args.base_size, - crop_size=args.crop_size, - scale=args.scale, - xmap=True, - batch_size=batch_size, - gpu_num=num) - batch_train_data = paddle.batch(paddle.reader.shuffle( - train_data, buf_size=batch_size * 64), - batch_size=batch_size, - drop_last=True) - - val_data = cityscapes_val(data_root=data_root, - base_size=args.base_size, - crop_size=args.crop_size, - scale=args.scale, - xmap=True) - batch_test_data = paddle.batch(val_data, - batch_size=batch_size, - drop_last=True) - - train_iou_manager = fluid.metrics.Accuracy() - train_avg_loss_manager = fluid.metrics.Accuracy() - test_iou_manager = fluid.metrics.Accuracy() - test_avg_loss_manager = fluid.metrics.Accuracy() - - better_miou_train = 0 - better_miou_test = 0 - - for epoch in range(num_epochs): - prev_time = datetime.now() - train_avg_loss_manager.reset() - train_iou_manager.reset() - for batch_id, data in enumerate(batch_train_data()): - image = np.array([x[0] for x in data]).astype('float32') - label = np.array([x[1] for x in data]).astype('int64') - - image = fluid.dygraph.to_variable(image) - label = fluid.dygraph.to_variable(label) - label.stop_gradient = True - pred, pred2, pred3 = model(image) - train_loss = loss_fn(pred, pred2, pred3, label, num_classes=num_classes) - train_avg_loss = fluid.layers.mean(train_loss) - miou, wrong, correct = mean_iou(pred, label, num_classes=num_classes) - train_avg_loss.backward() - optimizer.minimize(train_avg_loss) - model.clear_gradients() - train_iou_manager.update(miou.numpy(), weight=int(batch_size * num)) - train_avg_loss_manager.update(train_avg_loss.numpy(), weight=int(batch_size * num)) - batch_train_str = "epoch: {}, batch: {}, train_avg_loss: {:.6f}, " \ - "train_miou: {:.6f}.".format(epoch + 1, - batch_id + 1, - train_avg_loss.numpy()[0], - miou.numpy()[0]) - if batch_id % 100 == 0: - logging.info(batch_train_str) - print(batch_train_str) - cur_time = datetime.now() - h, remainder = divmod((cur_time - prev_time).seconds, 3600) - m, s = divmod(remainder, 60) - time_str = " Time %02d:%02d:%02d" % (h, m, s) - train_str = "\nepoch: {}, train_avg_loss: {:.6f}, " \ - "train_miou: {:.6f}.".format(epoch + 1, - train_avg_loss_manager.eval()[0], - train_iou_manager.eval()[0]) - print(train_str + time_str + '\n') - logging.info(train_str + time_str + '\n') - plot_loss.append(train_loss_title, epoch, train_avg_loss_manager.eval()[0]) - plot_loss.plot('./DANet_loss_dygraph.jpg') - plot_iou.append(train_iou_title, epoch, train_iou_manager.eval()[0]) - plot_iou.plot('./DANet_miou_dygraph.jpg') - fluid.dygraph.save_dygraph(model.state_dict(), 'checkpoint/DANet_epoch_new') - # save_model - if better_miou_train < train_iou_manager.eval()[0]: - shutil.rmtree('checkpoint/DANet_better_train_{:.4f}.pdparams'.format(better_miou_train), - ignore_errors=True) - better_miou_train = train_iou_manager.eval()[0] - fluid.dygraph.save_dygraph(model.state_dict(), - 'checkpoint/DANet_better_train_{:.4f}'.format(better_miou_train)) - - ########## test ############ - model.eval() - test_iou_manager.reset() - test_avg_loss_manager.reset() - prev_time = datetime.now() - for (batch_id, data) in enumerate(batch_test_data()): - image = np.array([x[0] for x in data]).astype('float32') - label = np.array([x[1] for x in data]).astype('int64') - - image = fluid.dygraph.to_variable(image) - label = fluid.dygraph.to_variable(label) - - label.stop_gradient = True - pred, pred2, pred3 = model(image) - test_loss = loss_fn(pred, pred2, pred3, label, num_classes=num_classes) - test_avg_loss = fluid.layers.mean(test_loss) - miou, wrong, correct = mean_iou(pred, label, num_classes=num_classes) - test_iou_manager.update(miou.numpy(), weight=int(batch_size * num)) - test_avg_loss_manager.update(test_avg_loss.numpy(), weight=int(batch_size * num)) - batch_test_str = "epoch: {}, batch: {}, test_avg_loss: {:.6f}, " \ - "test_miou: {:.6f}.".format(epoch + 1, batch_id + 1, - test_avg_loss.numpy()[0], - miou.numpy()[0]) - if batch_id % 20 == 0: - logging.info(batch_test_str) - print(batch_test_str) - cur_time = datetime.now() - h, remainder = divmod((cur_time - prev_time).seconds, 3600) - m, s = divmod(remainder, 60) - time_str = " Time %02d:%02d:%02d" % (h, m, s) - test_str = "\nepoch: {}, test_avg_loss: {:.6f}, " \ - "test_miou: {:.6f}.".format(epoch + 1, - test_avg_loss_manager.eval()[0], - test_iou_manager.eval()[0]) - print(test_str + time_str + '\n') - logging.info(test_str + time_str + '\n') - plot_loss.append(test_loss_title, epoch, test_avg_loss_manager.eval()[0]) - plot_loss.plot('./DANet_loss_dygraph.jpg') - plot_iou.append(test_iou_title, epoch, test_iou_manager.eval()[0]) - plot_iou.plot('./DANet_miou_dygraph.jpg') - model.train() - # save_model - if better_miou_test < test_iou_manager.eval()[0]: - shutil.rmtree('checkpoint/DANet_better_test_{:.4f}.pdparams'.format(better_miou_test), - ignore_errors=True) - better_miou_test = test_iou_manager.eval()[0] - fluid.dygraph.save_dygraph(model.state_dict(), - 'checkpoint/DANet_better_test_{:.4f}'.format(better_miou_test)) - - -if __name__ == '__main__': - options = Options() - args = options.parse() - options.print_args() - main(args) diff --git a/PaddleCV/Research/danet/train_executor.py b/PaddleCV/Research/danet/train_executor.py deleted file mode 100644 index 82f451dd168527886081c684686dd271a9e3c38c..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/danet/train_executor.py +++ /dev/null @@ -1,423 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" -os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = "0.99" - -import paddle.fluid as fluid -import numpy as np -import random -import paddle -import logging -import shutil -import multiprocessing -import sys -from datetime import datetime -from paddle.utils import Ploter - -from danet import DANet -from options import Options -from utils.cityscapes_data import cityscapes_train -from utils.cityscapes_data import cityscapes_val -from utils.lr_scheduler import Lr -import matplotlib - -matplotlib.use('Agg') - - -def get_model(args): - model = DANet('DANet', - backbone=args.backbone, - num_classes=args.num_classes, - batch_size=args.batch_size, - dilated=args.dilated, - multi_grid=args.multi_grid, - multi_dilation=args.multi_dilation) - return model - - -def _cpu_num(): - if "CPU_NUM" not in os.environ.keys(): - if multiprocessing.cpu_count() > 1: - sys.stderr.write( - '!!! The CPU_NUM is not specified, you should set CPU_NUM in the environment variable list.\n' - 'CPU_NUM indicates that how many CPUPlace are used in the current task.\n' - 'And if this parameter are set as N (equal to the number of physical CPU core) the program may be faster.\n\n' - 'export CPU_NUM={} # for example, set CPU_NUM as number of physical CPU core which is {}.\n\n' - '!!! The default number of CPU_NUM=1.\n'.format( - multiprocessing.cpu_count(), multiprocessing.cpu_count())) - os.environ['CPU_NUM'] = str(1) - cpu_num = os.environ.get('CPU_NUM') - return int(cpu_num) - - -def mean_iou(pred, label, num_classes=19): - label = fluid.layers.elementwise_min(fluid.layers.cast(label, np.int32), - fluid.layers.assign(np.array([num_classes], dtype=np.int32))) - label_ig = (label == num_classes).astype('int32') - label_ng = (label != num_classes).astype('int32') - pred = fluid.layers.cast(fluid.layers.argmax(pred, axis=1), 'int32') - pred = pred * label_ng + label_ig * num_classes - miou, wrong, correct = fluid.layers.mean_iou(pred, label, num_classes + 1) - label.stop_gradient = True - return miou, wrong, correct - - -def loss_fn(pred, pred2, pred3, label, num_classes=19): - pred = fluid.layers.transpose(pred, perm=[0, 2, 3, 1]) - pred = fluid.layers.reshape(pred, [-1, num_classes]) - - pred2 = fluid.layers.transpose(pred2, perm=[0, 2, 3, 1]) - pred2 = fluid.layers.reshape(pred2, [-1, num_classes]) - - pred3 = fluid.layers.transpose(pred3, perm=[0, 2, 3, 1]) - pred3 = fluid.layers.reshape(pred3, [-1, num_classes]) - - label = fluid.layers.reshape(label, [-1, 1]) - - # loss1 = fluid.layers.softmax_with_cross_entropy(pred, label, ignore_index=255) - # 以上方式会出现loss为NaN的情况 - pred = fluid.layers.softmax(pred, use_cudnn=False) - loss1 = fluid.layers.cross_entropy(pred, label, ignore_index=255) - - pred2 = fluid.layers.softmax(pred2, use_cudnn=False) - loss2 = fluid.layers.cross_entropy(pred2, label, ignore_index=255) - - pred3 = fluid.layers.softmax(pred3, use_cudnn=False) - loss3 = fluid.layers.cross_entropy(pred3, label, ignore_index=255) - - label.stop_gradient = True - return loss1 + loss2 + loss3 - - -def save_model(save_dir, exe, program=None): - if os.path.exists(save_dir): - shutil.rmtree(save_dir, ignore_errors=True) - os.makedirs(save_dir) - # fluid.io.save_persistables(exe, save_dir, program) - fluid.io.save_params(exe, save_dir, program) - print('save: {}'.format(os.path.basename(save_dir))) - else: - os.makedirs(save_dir) - fluid.io.save_persistables(exe, save_dir, program) - print('create: {}'.format(os.path.basename(save_dir))) - - -def load_model(save_dir, exe, program=None): - if os.path.exists(save_dir): - # fluid.io.load_persistables(exe, save_dir, program) - fluid.io.load_params(exe, save_dir, program) - print('Load successful!') - else: - raise Exception('Please check the model path!') - - -def optimizer_setting(args): - if args.weight_decay is not None: - regular = fluid.regularizer.L2Decay(regularization_coeff=args.weight_decay) - else: - regular = None - if args.lr_scheduler == 'poly': - lr_scheduler = Lr(lr_policy='poly', - base_lr=args.lr, - epoch_nums=args.epoch_num, - step_per_epoch=args.step_per_epoch, - power=args.lr_pow, - warm_up=args.warm_up, - warmup_epoch=args.warmup_epoch) - decayed_lr = lr_scheduler.get_lr() - elif args.lr_scheduler == 'cosine': - lr_scheduler = Lr(lr_policy='cosine', - base_lr=args.lr, - epoch_nums=args.epoch_num, - step_per_epoch=args.step_per_epoch, - warm_up=args.warm_up, - warmup_epoch=args.warmup_epoch) - decayed_lr = lr_scheduler.get_lr() - elif args.lr_scheduler == 'piecewise': - lr_scheduler = Lr(lr_policy='piecewise', - base_lr=args.lr, - epoch_nums=args.epoch_num, - step_per_epoch=args.step_per_epoch, - warm_up=args.warm_up, - warmup_epoch=args.warmup_epoch, - decay_epoch=[50, 100, 150], - gamma=0.1) - decayed_lr = lr_scheduler.get_lr() - else: - decayed_lr = args.lr - return fluid.optimizer.MomentumOptimizer(learning_rate=decayed_lr, - momentum=args.momentum, - regularization=regular) - - -def main(args): - image_shape = args.crop_size - image = fluid.layers.data(name='image', shape=[3, image_shape, image_shape], dtype='float32') - label = fluid.layers.data(name='label', shape=[image_shape, image_shape], dtype='int64') - - batch_size = args.batch_size - epoch_num = args.epoch_num - num_classes = args.num_classes - data_root = args.data_folder - if args.cuda: - num = fluid.core.get_cuda_device_count() - print('The number of GPU: {}'.format(num)) - else: - num = _cpu_num() - print('The number of CPU: {}'.format(num)) - - # program - start_prog = fluid.default_startup_program() - train_prog = fluid.default_main_program() - - start_prog.random_seed = args.seed - train_prog.random_seed = args.seed - np.random.seed(args.seed) - random.seed(args.seed) - - # clone - test_prog = train_prog.clone(for_test=True) - - logging.basicConfig(level=logging.INFO, - filename='DANet_{}_train_executor.log'.format(args.backbone), - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') - logging.info('DANet') - logging.info(args) - - with fluid.program_guard(train_prog, start_prog): - with fluid.unique_name.guard(): - train_py_reader = fluid.io.PyReader(feed_list=[image, label], - capacity=64, - use_double_buffer=True, - iterable=False) - train_data = cityscapes_train(data_root=data_root, - base_size=args.base_size, - crop_size=args.crop_size, - scale=args.scale, - xmap=True, - batch_size=batch_size, - gpu_num=num) - batch_train_data = paddle.batch(paddle.reader.shuffle( - train_data, buf_size=batch_size * 16), - batch_size=batch_size, - drop_last=True) - train_py_reader.decorate_sample_list_generator(batch_train_data) - - model = get_model(args) - pred, pred2, pred3 = model(image) - train_loss = loss_fn(pred, pred2, pred3, label, num_classes=num_classes) - train_avg_loss = fluid.layers.mean(train_loss) - optimizer = optimizer_setting(args) - optimizer.minimize(train_avg_loss) - # miou不是真实的 - miou, wrong, correct = mean_iou(pred, label, num_classes=num_classes) - - with fluid.program_guard(test_prog, start_prog): - with fluid.unique_name.guard(): - test_py_reader = fluid.io.PyReader(feed_list=[image, label], - capacity=64, - iterable=False, - use_double_buffer=True) - val_data = cityscapes_val(data_root=data_root, - base_size=args.base_size, - crop_size=args.crop_size, - scale=args.scale, - xmap=True) - batch_test_data = paddle.batch(val_data, - batch_size=batch_size, - drop_last=True) - test_py_reader.decorate_sample_list_generator(batch_test_data) - - model = get_model(args) - pred, pred2, pred3 = model(image) - test_loss = loss_fn(pred, pred2, pred3, label, num_classes=num_classes) - test_avg_loss = fluid.layers.mean(test_loss) - # miou不是真实的 - miou, wrong, correct = mean_iou(pred, label, num_classes=num_classes) - - place = fluid.CUDAPlace(0) if args.cuda else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(start_prog) - - if args.use_data_parallel and args.cuda: - exec_strategy = fluid.ExecutionStrategy() - exec_strategy.num_threads = fluid.core.get_cuda_device_count() - exec_strategy.num_iteration_per_drop_scope = 100 - build_strategy = fluid.BuildStrategy() - build_strategy.sync_batch_norm = True - print("sync_batch_norm = True!") - compiled_train_prog = fluid.compiler.CompiledProgram(train_prog).with_data_parallel( - loss_name=train_avg_loss.name, - build_strategy=build_strategy, - exec_strategy=exec_strategy) - else: - compiled_train_prog = fluid.compiler.CompiledProgram(train_prog) - - # 加载预训练模型 - if args.load_pretrained_model: - assert os.path.exists(args.save_model), "your input save_model: {} ,but '{}' is not exists".format( - args.save_model, args.save_model) - load_model(args.save_model, exe, program=train_prog) - print('load pretrained model!') - - # 加载最优模型 - if args.load_better_model: - assert os.path.exists(args.save_model), "your input save_model: {} ,but '{}' is not exists".format( - args.save_model, args.save_model) - load_model(args.save_model, exe, program=train_prog) - print('load better model!') - - train_iou_manager = fluid.metrics.Accuracy() - train_avg_loss_manager = fluid.metrics.Accuracy() - test_iou_manager = fluid.metrics.Accuracy() - test_avg_loss_manager = fluid.metrics.Accuracy() - better_miou_train = 0 - better_miou_test = 0 - - train_loss_title = 'Train_loss' - test_loss_title = 'Test_loss' - - train_iou_title = 'Train_mIOU' - test_iou_title = 'Test_mIOU' - - plot_loss = Ploter(train_loss_title, test_loss_title) - plot_iou = Ploter(train_iou_title, test_iou_title) - - for epoch in range(epoch_num): - prev_time = datetime.now() - train_avg_loss_manager.reset() - train_iou_manager.reset() - logging.info('training, epoch = {}'.format(epoch + 1)) - train_py_reader.start() - batch_id = 0 - while True: - try: - train_fetch_list = [train_avg_loss, miou, wrong, correct] - train_avg_loss_value, train_iou_value, w, c = exe.run( - program=compiled_train_prog, - fetch_list=train_fetch_list) - - train_iou_manager.update(train_iou_value, weight=int(batch_size * num)) - train_avg_loss_manager.update(train_avg_loss_value, weight=int(batch_size * num)) - batch_train_str = "epoch: {}, batch: {}, train_avg_loss: {:.6f}, " \ - "train_miou: {:.6f}.".format(epoch + 1, - batch_id + 1, - train_avg_loss_value[0], - train_iou_value[0]) - if batch_id % 40 == 0: - logging.info(batch_train_str) - print(batch_train_str) - batch_id += 1 - except fluid.core.EOFException: - train_py_reader.reset() - break - cur_time = datetime.now() - h, remainder = divmod((cur_time - prev_time).seconds, 3600) - m, s = divmod(remainder, 60) - time_str = " Time %02d:%02d:%02d" % (h, m, s) - train_str = "epoch: {}, train_avg_loss: {:.6f}, " \ - "train_miou: {:.6f}.".format(epoch + 1, - train_avg_loss_manager.eval()[0], - train_iou_manager.eval()[0]) - print(train_str + time_str + '\n') - logging.info(train_str + time_str) - plot_loss.append(train_loss_title, epoch, train_avg_loss_manager.eval()[0]) - plot_loss.plot('./DANet_loss_executor.jpg') - plot_iou.append(train_iou_title, epoch, train_iou_manager.eval()[0]) - plot_iou.plot('./DANet_miou_executor.jpg') - - # save_model - if better_miou_train < train_iou_manager.eval()[0]: - shutil.rmtree('./checkpoint/DANet_better_train_{:.4f}'.format(better_miou_train), - ignore_errors=True) - better_miou_train = train_iou_manager.eval()[0] - logging.warning( - '-----------train---------------better_train: {:.6f}, epoch: {}, -----------Train model saved successfully!\n'.format( - better_miou_train, epoch + 1)) - save_dir = './checkpoint/DANet_better_train_{:.4f}'.format(better_miou_train) - save_model(save_dir, exe, program=train_prog) - if (epoch + 1) % 5 == 0: - save_dir = './checkpoint/DANet_epoch_train' - save_model(save_dir, exe, program=train_prog) - - # test - test_py_reader.start() - test_iou_manager.reset() - test_avg_loss_manager.reset() - prev_time = datetime.now() - logging.info('testing, epoch = {}'.format(epoch + 1)) - batch_id = 0 - while True: - try: - test_fetch_list = [test_avg_loss, miou, wrong, correct] - test_avg_loss_value, test_iou_value, _, _ = exe.run(program=test_prog, - fetch_list=test_fetch_list) - test_iou_manager.update(test_iou_value, weight=int(batch_size * num)) - test_avg_loss_manager.update(test_avg_loss_value, weight=int(batch_size * num)) - batch_test_str = "epoch: {}, batch: {}, test_avg_loss: {:.6f}, " \ - "test_miou: {:.6f}. ".format(epoch + 1, - batch_id + 1, - test_avg_loss_value[0], - test_iou_value[0]) - if batch_id % 40 == 0: - logging.info(batch_test_str) - print(batch_test_str) - batch_id += 1 - except fluid.core.EOFException: - test_py_reader.reset() - break - cur_time = datetime.now() - h, remainder = divmod((cur_time - prev_time).seconds, 3600) - m, s = divmod(remainder, 60) - time_str = " Time %02d:%02d:%02d" % (h, m, s) - test_str = "epoch: {}, test_avg_loss: {:.6f}, " \ - "test_miou: {:.6f}.".format(epoch + 1, - test_avg_loss_manager.eval()[0], - test_iou_manager.eval()[0]) - print(test_str + time_str + '\n') - logging.info(test_str + time_str) - plot_loss.append(test_loss_title, epoch, test_avg_loss_manager.eval()[0]) - plot_loss.plot('./DANet_loss_executor.jpg') - plot_iou.append(test_iou_title, epoch, test_iou_manager.eval()[0]) - plot_iou.plot('./DANet_miou_executor.jpg') - - # save_model_infer - if better_miou_test < test_iou_manager.eval()[0]: - shutil.rmtree('./checkpoint/infer/DANet_better_test_{:.4f}'.format(better_miou_test), - ignore_errors=True) - better_miou_test = test_iou_manager.eval()[0] - logging.warning( - '------------test-------------infer better_test: {:.6f}, epoch: {}, ----------------Inference model saved successfully!\n'.format( - better_miou_test, epoch + 1)) - save_dir = './checkpoint/infer/DANet_better_test_{:.4f}'.format(better_miou_test) - # save_model(save_dir, exe, program=test_prog) - fluid.io.save_inference_model(save_dir, [image.name], [pred, pred2, pred3], exe) - print('Inference model saved successfully') - - -if __name__ == '__main__': - options = Options() - args = options.parse() - options.print_args() - main(args) - - - diff --git a/PaddleCV/Research/danet/utils/__init__.py b/PaddleCV/Research/danet/utils/__init__.py deleted file mode 100644 index 8469aa22358578b58a9161761d987815064060f2..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/danet/utils/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from .base import BaseDataSet -from .cityscapes import CityScapes -from .lr_scheduler import Lr -from .cityscapes_data import * -from .voc import VOC -from .voc_data import * diff --git a/PaddleCV/Research/danet/utils/base.py b/PaddleCV/Research/danet/utils/base.py deleted file mode 100644 index b1528917f870b5965634b3b31f803866036b539f..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/danet/utils/base.py +++ /dev/null @@ -1,132 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import random -import numpy as np -from PIL import Image, ImageOps, ImageFilter, ImageEnhance -import os -import sys - -curPath = os.path.abspath(os.path.dirname(__file__)) -parentPath = os.path.split(curPath)[0] -rootPath = os.path.split(parentPath)[0] -sys.path.append(rootPath) - - -class BaseDataSet(object): - - def __init__(self, root, split, base_size=1024, crop_size=768, scale=True): - self.root = root - support = ['train', 'train_val', 'val', 'test'] - assert split in support, "split= \'{}\' not in {}".format(split, support) - self.split = split - self.crop_size = crop_size # 裁剪大小 - self.base_size = base_size # 图片最短边 - self.scale = scale - self.image_path = None - self.label_path = None - - def sync_transform(self, image, label, aug=True): - crop_size = self.crop_size - if self.scale: - short_size = random.randint(int(self.base_size * 0.75), int(self.base_size * 2.0)) - else: - short_size = self.base_size - - # 随机左右翻转 - if random.random() > 0.5: - image = image.transpose(Image.FLIP_LEFT_RIGHT) - label = label.transpose(Image.FLIP_LEFT_RIGHT) - w, h = image.size - - # 同比例缩放 - if h > w: - out_w = short_size - out_h = int(1.0 * h / w * out_w) - else: - out_h = short_size - out_w = int(1.0 * w / h * out_h) - image = image.resize((out_w, out_h), Image.BILINEAR) - label = label.resize((out_w, out_h), Image.NEAREST) - - # 四周填充 - if short_size < crop_size: - pad_h = crop_size - out_h if out_h < crop_size else 0 - pad_w = crop_size - out_w if out_w < crop_size else 0 - image = ImageOps.expand(image, border=(pad_w // 2, pad_h // 2, pad_w - pad_w // 2, pad_h - pad_h // 2), - fill=0) - label = ImageOps.expand(label, border=(pad_w // 2, pad_h // 2, pad_w - pad_w // 2, pad_h - pad_h // 2), - fill=255) - - # 随机裁剪 - w, h = image.size - x = random.randint(0, w - crop_size) - y = random.randint(0, h - crop_size) - image = image.crop((x, y, x + crop_size, y + crop_size)) - label = label.crop((x, y, x + crop_size, y + crop_size)) - - if aug: - # 高斯模糊,可选 - if random.random() > 0.7: - image = image.filter(ImageFilter.GaussianBlur(radius=random.random())) - - # 可选 - if random.random() > 0.7: - # 随机亮度 - factor = np.random.uniform(0.75, 1.25) - image = ImageEnhance.Brightness(image).enhance(factor) - - # 颜色抖动 - factor = np.random.uniform(0.75, 1.25) - image = ImageEnhance.Color(image).enhance(factor) - - # 随机对比度 - factor = np.random.uniform(0.75, 1.25) - image = ImageEnhance.Contrast(image).enhance(factor) - - # 随机锐度 - factor = np.random.uniform(0.75, 1.25) - image = ImageEnhance.Sharpness(image).enhance(factor) - return image, label - - def sync_val_transform(self, image, label): - crop_size = self.crop_size - short_size = self.base_size - - w, h = image.size - - # 同比例缩放 - if h > w: - out_w = short_size - out_h = int(1.0 * h / w * out_w) - else: - out_h = short_size - out_w = int(1.0 * w / h * out_h) - image = image.resize((out_w, out_h), Image.BILINEAR) - label = label.resize((out_w, out_h), Image.NEAREST) - - # 中心裁剪 - w, h = image.size - x1 = int(round((w - crop_size) / 2.)) - y1 = int(round((h - crop_size) / 2.)) - image = image.crop((x1, y1, x1 + crop_size, y1 + crop_size)) - label = label.crop((x1, y1, x1 + crop_size, y1 + crop_size)) - return image, label - - def eval(self, image): - pass diff --git a/PaddleCV/Research/danet/utils/cityscapes.py b/PaddleCV/Research/danet/utils/cityscapes.py deleted file mode 100644 index 5c7ee431c11c8d9eb0cdd9d3bd976156d5883766..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/danet/utils/cityscapes.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -from utils.base import BaseDataSet - - -class CityScapes(BaseDataSet): - """prepare cityscapes path_pairs""" - - BASE_DIR = 'cityscapes' - NUM_CLASS = 19 - - def __init__(self, root='./dataset', split='train', **kwargs): - super(CityScapes, self).__init__(root, split, **kwargs) - if os.sep == '\\': # windows - root = root.replace('/', '\\') - - root = os.path.join(root, self.BASE_DIR) - assert os.path.exists(root), "please download cityscapes data_set, put in dataset(dir),or check root" - self.image_path, self.label_path = self._get_cityscapes_pairs(root, split) - assert len(self.image_path) == len(self.label_path), "please check image_length = label_length" - self.print_param() - - def print_param(self): # 用于核对当前数据集的信息 - print('INFO: dataset_root: {}, split: {}, ' - 'base_size: {}, crop_size: {}, scale: {}, ' - 'image_length: {}, label_length: {}'.format(self.root, self.split, self.base_size, - self.crop_size, self.scale, len(self.image_path), - len(self.label_path))) - - @staticmethod - def _get_cityscapes_pairs(root, split): - - def get_pairs(root, file_image, file_label): - file_image = os.path.join(root, file_image) - file_label = os.path.join(root, file_label) - with open(file_image, 'r') as f: - file_list_image = f.read().split() - with open(file_label, 'r') as f: - file_list_label = f.read().split() - if os.sep == '\\': # for windows - image_path = [os.path.join(root, x.replace('/', '\\')) for x in file_list_image] - label_path = [os.path.join(root, x.replace('/', '\\')) for x in file_list_label] - else: - image_path = [os.path.join(root, x) for x in file_list_image] - label_path = [os.path.join(root, x) for x in file_list_label] - return image_path, label_path - - if split == 'train': - image_path, label_path = get_pairs(root, 'trainImages.txt', 'trainLabels.txt') - elif split == 'val': - image_path, label_path = get_pairs(root, 'valImages.txt', 'valLabels.txt') - elif split == 'test': - image_path, label_path = get_pairs(root, 'testImages.txt', 'testLabels.txt') # 返回文件路径,test_label并不存在 - else: # 'train_val' - image_path1, label_path1 = get_pairs(root, 'trainImages.txt', 'trainLabels.txt') - image_path2, label_path2 = get_pairs(root, 'valImages.txt', 'valLabels.txt') - image_path, label_path = image_path1+image_path2, label_path1+label_path2 - return image_path, label_path - - def get_path_pairs(self): - return self.image_path, self.label_path - diff --git a/PaddleCV/Research/danet/utils/cityscapes_data.py b/PaddleCV/Research/danet/utils/cityscapes_data.py deleted file mode 100644 index e96534cf31d5f5a2e226435d527515bef7bd8f03..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/danet/utils/cityscapes_data.py +++ /dev/null @@ -1,144 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import random -import paddle -import numpy as np - -from PIL import Image - -from utils.cityscapes import CityScapes - -__all__ = ['cityscapes_train', 'cityscapes_val', 'cityscapes_train_val', 'cityscapes_test'] - -# globals -data_mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1) -data_std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1) - - -def mapper_train(sample): - image_path, label_path, city = sample - image = Image.open(image_path, mode='r').convert('RGB') - label = Image.open(label_path, mode='r') - - image, label = city.sync_transform(image, label) - image_array = np.array(image) # HWC - label_array = np.array(label) # HW - - image_array = image_array.transpose((2, 0, 1)) # CHW - image_array = image_array / 255.0 - image_array = (image_array - data_mean) / data_std - image_array = image_array.astype('float32') - label_array = label_array.astype('int64') - return image_array, label_array - - -def mapper_val(sample): - image_path, label_path, city = sample - image = Image.open(image_path, mode='r').convert('RGB') - label = Image.open(label_path, mode='r') - - image, label = city.sync_val_transform(image, label) - image_array = np.array(image) # HWC - label_array = np.array(label) # HW - - image_array = image_array.transpose((2, 0, 1)) # CHW - image_array = image_array / 255.0 - image_array = (image_array - data_mean) / data_std - image_array = image_array.astype('float32') - label_array = label_array.astype('int64') - return image_array, label_array - - -def mapper_test(sample): - image_path, label_path = sample # label is path - image = Image.open(image_path, mode='r').convert('RGB') - image_array = image - return image_array, label_path # image is a picture, label is path - - -# root, base_size, crop_size; gpu_num必须设置,否则syncBN会出现某些卡没有数据的情况 -def cityscapes_train(data_root='./dataset', base_size=1024, crop_size=768, scale=True, xmap=True, batch_size=1, gpu_num=1): - city = CityScapes(root=data_root, split='train', base_size=base_size, crop_size=crop_size, scale=scale) - image_path, label_path = city.get_path_pairs() - - def reader(): - if len(image_path) % (batch_size * gpu_num) != 0: - length = (len(image_path) // (batch_size * gpu_num)) * (batch_size * gpu_num) - else: - length = len(image_path) - for i in range(length): - if i == 0: - cc = list(zip(image_path, label_path)) - random.shuffle(cc) - image_path[:], label_path[:] = zip(*cc) - yield image_path[i], label_path[i], city - if xmap: - return paddle.reader.xmap_readers(mapper_train, reader, 4, 32) - else: - return paddle.reader.map_readers(mapper_train, reader) - - -def cityscapes_val(data_root='./dataset', base_size=1024, crop_size=768, scale=True, xmap=True): - city = CityScapes(root=data_root, split='val', base_size=base_size, crop_size=crop_size, scale=scale) - image_path, label_path = city.get_path_pairs() - - def reader(): - for i in range(len(image_path)): - yield image_path[i], label_path[i], city - - if xmap: - return paddle.reader.xmap_readers(mapper_val, reader, 4, 32) - else: - return paddle.reader.map_readers(mapper_val, reader) - - -def cityscapes_train_val(data_root='./dataset', base_size=1024, crop_size=768, scale=True, xmap=True, batch_size=1, gpu_num=1): - city = CityScapes(root=data_root, split='train_val', base_size=base_size, crop_size=crop_size, scale=scale) - image_path, label_path = city.get_path_pairs() - - def reader(): - if len(image_path) % (batch_size * gpu_num) != 0: - length = (len(image_path) // (batch_size * gpu_num)) * (batch_size * gpu_num) - else: - length = len(image_path) - for i in range(length): - if i == 0: - cc = list(zip(image_path, label_path)) - random.shuffle(cc) - image_path[:], label_path[:] = zip(*cc) - yield image_path[i], label_path[i], city - - if xmap: - return paddle.reader.xmap_readers(mapper_train, reader, 4, 32) - else: - return paddle.reader.map_readers(mapper_train, reader) - - -def cityscapes_test(split='test', base_size=2048, crop_size=1024, scale=True, xmap=True): - # 实际未使用base_size, crop_size, scale - city = CityScapes(split=split, base_size=base_size, crop_size=crop_size, scale=scale) - image_path, label_path = city.get_path_pairs() - - def reader(): - for i in range(len(image_path)): - yield image_path[i], label_path[i] - if xmap: - return paddle.reader.xmap_readers(mapper_test, reader, 4, 32) - else: - return paddle.reader.map_readers(mapper_test, reader) diff --git a/PaddleCV/Research/danet/utils/lr_scheduler.py b/PaddleCV/Research/danet/utils/lr_scheduler.py deleted file mode 100644 index 4ce8316a43536aef9414ca5e40a4e8a5ccb63aba..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/danet/utils/lr_scheduler.py +++ /dev/null @@ -1,152 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid -import math - - -class Lr(object): - """ - 示例:使用poly策略, 有热身, - lr_scheduler = Lr(lr_policy='poly', base_lr=0.003, epoch_nums=200, step_per_epoch=20, - warm_up=True, warmup_epoch=11) - lr = lr_scheduler.get_lr() - - 示例:使用cosine策略, 有热身, - lr_scheduler = Lr(lr_policy='cosine', base_lr=0.003, epoch_nums=200, step_per_epoch=20, - warm_up=True, warmup_epoch=11) - lr = lr_scheduler.get_lr() - - 示例:使用piecewise策略, 有热身,必须设置边界(decay_epoch list), gamma系数默认0.1 - lr_scheduler = Lr(lr_policy='piecewise', base_lr=0.003, epoch_nums=200, step_per_epoch=20, - warm_up=True, warmup_epoch=11, decay_epoch=[50], gamma=0.1) - lr = lr_scheduler.get_lr() - """ - def __init__(self, lr_policy, base_lr, epoch_nums, step_per_epoch, - power=0.9, end_lr=0.0, gamma=0.1, decay_epoch=[], - warm_up=False, warmup_epoch=0): - support_lr_policy = ['poly', 'piecewise', 'cosine'] - assert lr_policy in support_lr_policy, "Only support poly, piecewise, cosine" - self.lr_policy = lr_policy # 学习率衰减策略 : str(`cosine`, `poly`, `piecewise`) - - assert base_lr >= 0, "Start learning rate should greater than 0" - self.base_lr = base_lr # 基础学习率: float - - assert end_lr >= 0, "End learning rate should greater than 0" - self.end_lr = end_lr # 学习率终点: float - - assert epoch_nums, "epoch_nums should greater than 0" - assert step_per_epoch, "step_per_epoch should greater than 0" - - self.epoch_nums = epoch_nums # epoch数: int - self.step_per_epoch = step_per_epoch # 每个epoch的迭代数: int - self.total_step = epoch_nums * step_per_epoch # 总的迭代数 :auto - self.power = power # 指数: float - self.gamma = gamma # 分段衰减的系数: float - self.decay_epoch = decay_epoch # 分段衰减的epoch: list - if self.lr_policy == 'piecewise': - assert len(decay_epoch) >= 1, "use piecewise policy, should set decay_epoch list" - self.warm_up = warm_up # 是否热身:bool - if self.warm_up: - assert warmup_epoch, "warmup_epoch should greater than 0" - assert warmup_epoch < epoch_nums, "warmup_epoch should less than epoch_nums" - self.warmup_epoch = warmup_epoch - self.warmup_steps = warmup_epoch * step_per_epoch # 热身steps:int(epoch*step_per_epoch) - - def _piecewise_decay(self): - gamma = self.gamma - bd = [self.step_per_epoch * e for e in self.decay_epoch] - lr = [self.base_lr * (gamma ** i) for i in range(len(bd) + 1)] - decayed_lr = fluid.layers.piecewise_decay(boundaries=bd, values=lr) - return decayed_lr - - def _poly_decay(self): - decayed_lr = fluid.layers.polynomial_decay( - self.base_lr, self.total_step, end_learning_rate=self.end_lr, power=self.power) - return decayed_lr - - def _cosine_decay(self): - decayed_lr = fluid.layers.cosine_decay( - self.base_lr, self.step_per_epoch, self.epoch_nums) - return decayed_lr - - def get_lr(self): - if self.lr_policy.lower() == 'poly': - if self.warm_up: - warm_up_end_lr = (self.base_lr - self.end_lr) * pow( - (1 - self.warmup_steps / self.total_step), self.power) + self.end_lr - print('poly warm_up_end_lr:', warm_up_end_lr) - decayed_lr = fluid.layers.linear_lr_warmup(self._poly_decay(), - warmup_steps=self.warmup_steps, - start_lr=0.0, - end_lr=warm_up_end_lr) - else: - decayed_lr = self._poly_decay() - elif self.lr_policy.lower() == 'piecewise': - if self.warm_up: - assert self.warmup_steps < self.decay_epoch[0] * self.step_per_epoch - warm_up_end_lr = self.base_lr - print('piecewise warm_up_end_lr:', warm_up_end_lr) - decayed_lr = fluid.layers.linear_lr_warmup(self._piecewise_decay(), - warmup_steps=self.warmup_steps, - start_lr=0.0, - end_lr=warm_up_end_lr) - else: - decayed_lr = self._piecewise_decay() - elif self.lr_policy.lower() == 'cosine': - if self.warm_up: - warm_up_end_lr = self.base_lr*0.5*(math.cos(self.warmup_epoch*math.pi/self.epoch_nums)+1) - print('cosine warm_up_end_lr:', warm_up_end_lr) - decayed_lr = fluid.layers.linear_lr_warmup(self._cosine_decay(), - warmup_steps=self.warmup_steps, - start_lr=0.0, - end_lr=warm_up_end_lr) - else: - decayed_lr = self._cosine_decay() - else: - raise Exception( - "unsupport learning decay policy! only support poly,piecewise,cosine" - ) - return decayed_lr - - -if __name__ == '__main__': - epoch_nums = 200 - step_per_epoch = 180 - base_lr = 0.003 - warmup_epoch = 5 # 热身数 - lr_scheduler = Lr(lr_policy='poly', base_lr=base_lr, epoch_nums=epoch_nums, step_per_epoch=step_per_epoch, - warm_up=True, warmup_epoch=warmup_epoch, decay_epoch=[50]) - lr = lr_scheduler.get_lr() - exe = fluid.Executor(fluid.CPUPlace()) - exe.run(fluid.default_startup_program()) - - lr_list = [] - for epoch in range(epoch_nums): - for i in range(step_per_epoch): - x = exe.run(fluid.default_main_program(), - fetch_list=[lr]) - lr_list.append(x[0]) - # print(x[0]) - # 绘图 - from matplotlib import pyplot as plt - plt.plot(range(epoch_nums*step_per_epoch), lr_list) - plt.xlabel('step') - plt.ylabel('lr') - plt.show() - diff --git a/PaddleCV/Research/danet/utils/voc.py b/PaddleCV/Research/danet/utils/voc.py deleted file mode 100644 index 01021ec01f6e0e96df65e6af50863db96e400eef..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/danet/utils/voc.py +++ /dev/null @@ -1,101 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from utils.base import BaseDataSet - - -class VOC(BaseDataSet): - """prepare pascalVOC path_pairs""" - BASE_DIR = 'VOC2012_SBD' - NUM_CLASS = 21 - - def __init__(self, root='../dataset', split='train', **kwargs): - super(VOC, self).__init__(root, split, **kwargs) - if os.sep == '\\': # windows - root = root.replace('/', '\\') - - root = os.path.join(root, self.BASE_DIR) - assert os.path.exists(root), "please download voc2012 data_set, put in dataset(dir)" - if split == 'test': - self.image_path = self._get_cityscapes_pairs(root, split) - else: - self.image_path, self.label_path = self._get_cityscapes_pairs(root, split) - if self.label_path is None: - pass - else: - assert len(self.image_path) == len(self.label_path), "please check image_length = label_length" - self.print_param() - - def print_param(self): # 用于核对当前数据集的信息 - if self.label_path is None: - print('INFO: dataset_root: {}, split: {}, ' - 'base_size: {}, crop_size: {}, scale: {}, ' - 'image_length: {}'.format(self.root, self.split, self.base_size, - self.crop_size, self.scale, len(self.image_path))) - else: - print('INFO: dataset_root: {}, split: {}, ' - 'base_size: {}, crop_size: {}, scale: {}, ' - 'image_length: {}, label_length: {}'.format(self.root, self.split, self.base_size, - self.crop_size, self.scale, len(self.image_path), - len(self.label_path))) - - @staticmethod - def _get_cityscapes_pairs(root, split): - - def get_pairs(root, file): - if file.find('test') == -1: - file = os.path.join(root, file) - with open(file, 'r') as f: - file_list = f.readlines() - if os.sep == '\\': # for windows - image_path = [ - os.path.join(root, 'pascal', 'VOC2012', x.split()[0][1:].replace('/', '\\').replace('\n', '')) - for x in file_list] - label_path = [os.path.join(root, 'pascal', 'VOC2012', x.split()[1][1:].replace('/', '\\')) for x in - file_list] - else: - image_path = [os.path.join(root, 'pascal', 'VOC2012', x.split()[0][1:]) for x in file_list] - label_path = [os.path.join(root, 'pascal', 'VOC2012', x.split()[1][1:]) for x in file_list] - return image_path, label_path - else: - file = os.path.join(root, file) - with open(file, 'r') as f: - file_list = f.readlines() - if os.sep == '\\': # for windows - image_path = [ - os.path.join(root, 'pascal', 'VOC2012', x.split()[0][1:].replace('/', '\\').replace('\n', '')) - for x in file_list] - else: - image_path = [os.path.join(root, 'pascal', 'VOC2012', x.split()[0][1:]) for x in file_list] - return image_path - - if split == 'train': - image_path, label_path = get_pairs(root, 'list/train_aug.txt') - elif split == 'val': - image_path, label_path = get_pairs(root, 'list/val.txt') - elif split == 'test': - image_path = get_pairs(root, 'list/test.txt') # 返回文件路径,test_label并不存在 - return image_path - else: # 'train_val' - image_path, label_path = get_pairs(root, 'list/trainval_aug.txt') - return image_path, label_path - - def get_path_pairs(self): - return self.image_path, self.label_path diff --git a/PaddleCV/Research/danet/utils/voc_data.py b/PaddleCV/Research/danet/utils/voc_data.py deleted file mode 100644 index d2dba4f9135dc80fd9c015ea7a7c3bde1af5b0e1..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/danet/utils/voc_data.py +++ /dev/null @@ -1,144 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import random -import paddle -import numpy as np - -from PIL import Image - -from utils.voc import VOC - -__all__ = ['voc_train', 'voc_val', 'voc_train_val', 'voc_test'] - -# globals -data_mean = np.array([0.485, 0.456, 0.406]).reshape(3, 1, 1) -data_std = np.array([0.229, 0.224, 0.225]).reshape(3, 1, 1) - - -def mapper_train(sample): - image_path, label_path, voc = sample - image = Image.open(image_path, mode='r').convert('RGB') - label = Image.open(label_path, mode='r') - - image, label = voc.sync_transform(image, label) - image_array = np.array(image) # HWC - label_array = np.array(label) # HW - - image_array = image_array.transpose((2, 0, 1)) # CHW - image_array = image_array / 255.0 - image_array = (image_array - data_mean) / data_std - image_array = image_array.astype('float32') - label_array = label_array.astype('int64') - return image_array, label_array - - -def mapper_val(sample): - image_path, label_path, city = sample - image = Image.open(image_path, mode='r').convert('RGB') - label = Image.open(label_path, mode='r') - - image, label = city.sync_val_transform(image, label) - image_array = np.array(image) - label_array = np.array(label) - - image_array = image_array.transpose((2, 0, 1)) - image_array = image_array / 255.0 - image_array = (image_array - data_mean) / data_std - image_array = image_array.astype('float32') - label_array = label_array.astype('int64') - return image_array, label_array - - -def mapper_test(sample): - image_path, label_path = sample # label is path - image = Image.open(image_path, mode='r').convert('RGB') - image_array = image - return image_array, label_path # label is path - - -# 已完成, 引用时记得传入参数,root, base_size, crop_size等, gpu_num必须设置,否则syncBN会出现某些卡没有数据的情况 -def voc_train(data_root='../dataset', base_size=768, crop_size=576, scale=True, xmap=True, batch_size=1, gpu_num=1): - voc = VOC(root=data_root, split='train', base_size=base_size, crop_size=crop_size, scale=scale) - image_path, label_path = voc.get_path_pairs() - - def reader(): - if len(image_path) % (batch_size * gpu_num) != 0: - length = (len(image_path) // (batch_size * gpu_num)) * (batch_size * gpu_num) - else: - length = len(image_path) - for i in range(length): - if i == 0: - cc = list(zip(image_path, label_path)) - random.shuffle(cc) - image_path[:], label_path[:] = zip(*cc) - yield image_path[i], label_path[i], voc - if xmap: - return paddle.reader.xmap_readers(mapper_train, reader, 4, 32) - else: - return paddle.reader.map_readers(mapper_train, reader) - - -def voc_val(data_root='../dataset', base_size=768, crop_size=576, scale=True, xmap=True): - voc = VOC(root=data_root, split='val', base_size=base_size, crop_size=crop_size, scale=scale) - image_path, label_path = voc.get_path_pairs() - - def reader(): - for i in range(len(image_path)): - yield image_path[i], label_path[i], voc - - if xmap: - return paddle.reader.xmap_readers(mapper_val, reader, 4, 32) - else: - return paddle.reader.map_readers(mapper_val, reader) - - -def voc_train_val(data_root='./dataset', base_size=768, crop_size=576, scale=True, xmap=True, batch_size=1, gpu_num=1): - voc = VOC(root=data_root, split='train_val', base_size=base_size, crop_size=crop_size, scale=scale) - image_path, label_path = voc.get_path_pairs() - - def reader(): - if len(image_path) % (batch_size * gpu_num) != 0: - length = (len(image_path) // (batch_size * gpu_num)) * (batch_size * gpu_num) - else: - length = len(image_path) - for i in range(length): - if i == 0: - cc = list(zip(image_path, label_path)) - random.shuffle(cc) - image_path[:], label_path[:] = zip(*cc) - yield image_path[i], label_path[i] - - if xmap: - return paddle.reader.xmap_readers(mapper_train, reader, 4, 32) - else: - return paddle.reader.map_readers(mapper_train, reader) - - -def voc_test(split='test', base_size=2048, crop_size=1024, scale=True, xmap=True): - # 实际未使用base_size, crop_size, scale - voc = VOC(split=split, base_size=base_size, crop_size=crop_size, scale=scale) - image_path = voc.get_path_pairs() - - def reader(): - for i in range(len(image_path[:1])): - yield image_path[i], image_path[i] - if xmap: - return paddle.reader.xmap_readers(mapper_test, reader, 4, 32) - else: - return paddle.reader.map_readers(mapper_test, reader) diff --git a/PaddleCV/Research/inpainting-lbam/ActivationFunction.py b/PaddleCV/Research/inpainting-lbam/ActivationFunction.py deleted file mode 100644 index b2db1bd9ebb7a5ef05d5a99ffc9195e481ee55e4..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/inpainting-lbam/ActivationFunction.py +++ /dev/null @@ -1,53 +0,0 @@ -import numpy as np - -# asymmetric gaussian shaped activation function g_A -import paddle.fluid as fluid - - -def GaussActivation(input, a, mu, sigma1, sigma2): - initializer = fluid.initializer.ConstantInitializer(value=a) - a = fluid.layers.create_parameter( - shape=[1], dtype='float32', default_initializer=initializer) - a = fluid.layers.clip(a, min=1.01, max=6.0) - - initializer = fluid.initializer.ConstantInitializer(value=mu) - mu = fluid.layers.create_parameter( - shape=[1], dtype='float32', default_initializer=initializer) - mu = fluid.layers.clip(mu, min=0.1, max=3.0) - - initializer = fluid.initializer.ConstantInitializer(value=sigma1) - sigma1 = fluid.layers.create_parameter( - shape=[1], dtype='float32', default_initializer=initializer) - sigma1 = fluid.layers.clip(sigma1, min=1.0, max=2.0) - - initializer = fluid.initializer.ConstantInitializer(value=sigma2) - sigma2 = fluid.layers.create_parameter( - shape=[1], dtype='float32', default_initializer=initializer) - sigma2 = fluid.layers.clip(sigma2, min=1.0, max=2.0) - - lowerThanMu = fluid.layers.less_than(input, mu) - largerThanMu = fluid.layers.logical_not(lowerThanMu) - - diff_mu = (input - mu) - leftValuesActiv = fluid.layers.exp(-1 * fluid.layers.square(diff_mu) * - sigma1) * a - leftValuesActiv = leftValuesActiv * lowerThanMu - - rightValueActiv = 1 + fluid.layers.exp(-1 * fluid.layers.square(diff_mu) * - sigma2) * (a - 1) - rightValueActiv = rightValueActiv * largerThanMu - - output = leftValuesActiv + rightValueActiv - - return output - - -def MaskUpdate(input, alpha): - initializer = fluid.initializer.ConstantInitializer(value=alpha) - alpha_t = fluid.layers.create_parameter( - shape=[1], dtype='float32', default_initializer=initializer) - - alpha_t = fluid.layers.clip(alpha_t, min=0.6, max=0.8) - out = fluid.layers.relu(input) - out = fluid.layers.elementwise_pow(out, alpha_t) - return out diff --git a/PaddleCV/Research/inpainting-lbam/LBAMModel.py b/PaddleCV/Research/inpainting-lbam/LBAMModel.py deleted file mode 100644 index 101db2befaada46a8aa34277410391d9f3504813..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/inpainting-lbam/LBAMModel.py +++ /dev/null @@ -1,62 +0,0 @@ -import paddle.fluid as fluid -from forwardAttentionLayer import ForwardAttention -from reverseAttentionLayer import ReverseAttention, ReverseMaskConv - - -class LBAMModel(): - def __init__(self, num_filters): - self.num_filters = num_filters - - def net(self, inputImgs, masks): - ef1, mu1, skipConnect1, forwardMap1 = ForwardAttention( - inputImgs, masks, 64, bn=False) - ef2, mu2, skipConnect2, forwardMap2 = ForwardAttention(ef1, mu1, 128) - ef3, mu3, skipConnect3, forwardMap3 = ForwardAttention(ef2, mu2, 256) - ef4, mu4, skipConnect4, forwardMap4 = ForwardAttention(ef3, mu3, 512) - - ef5, mu5, skipConnect5, forwardMap5 = ForwardAttention(ef4, mu4, 512) - ef6, mu6, skipConnect6, forwardMap6 = ForwardAttention(ef5, mu5, 512) - ef7, _, _, _ = ForwardAttention(ef6, mu6, 512) - - reverseMap1, revMu1 = ReverseMaskConv(1 - masks, 64) - reverseMap2, revMu2 = ReverseMaskConv(revMu1, 128) - reverseMap3, revMu3 = ReverseMaskConv(revMu2, 256) - reverseMap4, revMu4 = ReverseMaskConv(revMu3, 512) - reverseMap5, revMu5 = ReverseMaskConv(revMu4, 512) - reverseMap6, _ = ReverseMaskConv(revMu5, 512) - - concatMap6 = fluid.layers.concat([forwardMap6, reverseMap6], axis=1) - dcFeatures1 = ReverseAttention(skipConnect6, ef7, concatMap6, 512) - - concatMap5 = fluid.layers.concat([forwardMap5, reverseMap5], axis=1) - dcFeatures2 = ReverseAttention(skipConnect5, dcFeatures1, concatMap5, - 512) - - concatMap4 = fluid.layers.concat([forwardMap4, reverseMap4], axis=1) - dcFeatures3 = ReverseAttention(skipConnect4, dcFeatures2, concatMap4, - 512) - - concatMap3 = fluid.layers.concat([forwardMap3, reverseMap3], axis=1) - dcFeatures4 = ReverseAttention(skipConnect3, dcFeatures3, concatMap3, - 256) - - concatMap2 = fluid.layers.concat([forwardMap2, reverseMap2], axis=1) - dcFeatures5 = ReverseAttention(skipConnect2, dcFeatures4, concatMap2, - 128) - - concatMap1 = fluid.layers.concat([forwardMap1, reverseMap1], axis=1) - dcFeatures6 = ReverseAttention(skipConnect1, dcFeatures5, concatMap1, - 64) - - dcFeatures7 = fluid.layers.conv2d_transpose( - input=dcFeatures6, - num_filters=self.num_filters, - filter_size=4, - stride=2, - padding=1, - act=None, - bias_attr=False) - - output = fluid.layers.abs(fluid.layers.tanh(dcFeatures7)) - - return output diff --git a/PaddleCV/Research/inpainting-lbam/README.md b/PaddleCV/Research/inpainting-lbam/README.md deleted file mode 100644 index c9d4784c6335ef341c528e319e4d74a115c427fe..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/inpainting-lbam/README.md +++ /dev/null @@ -1,39 +0,0 @@ -# Image Inpainting with Learnable Bidirectional Attention Maps. -The PaddlePaddle implementation of Image Inpainting with Learnable Bidirectional Attention Maps in ICCV 2019, by Chaohao Xie, Shaohui Liu, Chao Li, Ming-Ming Cheng, Wangmeng Zuo, Xiao Liu, Shilei Wen, Errui Ding.\ - - -## 1. Requirements. - -PaddlePaddle version == 1.6.\ -Python version == 3.6.\ -NCCL for multiple GPUs. - -## 2. Usage. - -Download the pretrained models by , password: apfo.\ -Run the test script. -``` -sh test.sh -``` - -``` -mkdir -p results/paris - -FLAGS_fraction_of_gpu_memory_to_use=0.1 \ -CUDA_VISIBLE_DEVICES=0 \ -FLAGS_eager_delete_tensor_gb=0.0 \ -FLAGS_fast_eager_deletion_mode=1 \ -python -u test.py \ ---pretrained_model 'pretrained_models/LBAM_ParisStreetView' \ # path to the pretrained model ---imgfn 'imgs/paris/pic.png' \ # input picture. ---maskfn 'imgs/paris/mask.png' \ # mask. ---resultfn 'results/paris' # folder for the result. -``` -Input picture:\ -![avatar](imgs/paris/pic.png) - -Input mask:\ -![avatar](imgs/paris/mask.png) - -Inpainting result:\ -![avatar](results/paris/pic.png) diff --git a/PaddleCV/Research/inpainting-lbam/forwardAttentionLayer.py b/PaddleCV/Research/inpainting-lbam/forwardAttentionLayer.py deleted file mode 100644 index 05ea3938e38e91208ed1bec783edd7b222517441..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/inpainting-lbam/forwardAttentionLayer.py +++ /dev/null @@ -1,98 +0,0 @@ -import paddle.fluid as fluid -from ActivationFunction import GaussActivation -from ActivationFunction import MaskUpdate - -# learnable forward attention conv layer - - -def ForwardAttentionLayer(inputFeatures, - inputMasks, - num_filters, - kSize, - stride, - padding, - bias=False): - convFeatures = fluid.layers.conv2d( - input=inputFeatures, - num_filters=num_filters, - filter_size=kSize, - stride=stride, - padding=padding, - act=None, - bias_attr=bias) - maskFeatures = fluid.layers.conv2d( - input=inputMasks, - num_filters=num_filters, - filter_size=kSize, - stride=stride, - padding=padding, - act=None, - bias_attr=bias) - - maskActiv = GaussActivation(maskFeatures, 1.1, 2.0, 1.0, 1.0) - convOut = convFeatures * maskActiv - - maskUpdate = MaskUpdate(maskFeatures, 0.8) - - return convOut, maskUpdate, convFeatures, maskActiv - - -def ForwardAttention(inputFeatures, - inputMasks, - num_filters, - bn=True, - sample='down-4', - activ='leaky', - convBias=False): - if sample == 'down-4': - kSize = 4 - stride = 2 - padding = 1 - elif sample == 'down-5': - kSize = 5 - stride = 2 - padding = 2 - - elif sample == 'down-7': - kSize = 7 - stride = 2 - padding = 3 - elif sample == 'down-3': - kSize = 3 - stride = 2 - padding = 1 - else: - kSize = 3 - stride = 1 - padding = 1 - features, maskUpdated, convPreF, maskActiv = ForwardAttentionLayer( - inputFeatures, - inputMasks, - num_filters, - kSize, - stride, - padding, - bias=convBias) - - if bn: - features = fluid.layers.batch_norm(input=features) - - if activ == 'leaky': - features = fluid.layers.leaky_relu(features, alpha=0.2) - - elif activ == 'relu': - features = fluid.layers.relu(features) - - elif activ == 'sigmoid': - features = fluid.layers.sigmoid(features) - - elif activ == 'tanh': - features = fluid.layers.tanh(features) - - elif activ == 'prelu': - features = fluid.layers.prelu(features, 'all') - - else: - pass - - return features, maskUpdated, convPreF, maskActiv diff --git a/PaddleCV/Research/inpainting-lbam/imgs/paris/mask.png b/PaddleCV/Research/inpainting-lbam/imgs/paris/mask.png deleted file mode 100644 index a4e69590139b7743196bc07b89a1ef481a711178..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/inpainting-lbam/imgs/paris/mask.png and /dev/null differ diff --git a/PaddleCV/Research/inpainting-lbam/imgs/paris/pic.png b/PaddleCV/Research/inpainting-lbam/imgs/paris/pic.png deleted file mode 100644 index b949fb62a16164eec217148006bbb3a87aa0b48d..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/inpainting-lbam/imgs/paris/pic.png and /dev/null differ diff --git a/PaddleCV/Research/inpainting-lbam/results/paris/pic.png b/PaddleCV/Research/inpainting-lbam/results/paris/pic.png deleted file mode 100644 index 683a0cc4c5e2e9ca1f14a907dd585b4b2c82cc61..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/inpainting-lbam/results/paris/pic.png and /dev/null differ diff --git a/PaddleCV/Research/inpainting-lbam/reverseAttentionLayer.py b/PaddleCV/Research/inpainting-lbam/reverseAttentionLayer.py deleted file mode 100644 index 4397a2b3fa0a873cdf74ff87920685dd3098efe1..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/inpainting-lbam/reverseAttentionLayer.py +++ /dev/null @@ -1,66 +0,0 @@ -import paddle.fluid as fluid -from ActivationFunction import GaussActivation -from ActivationFunction import MaskUpdate - - -def ReverseMaskConv(inputMasks, - num_filters, - kSize=4, - stride=2, - padding=1, - convBias=False): - maskFeatures = fluid.layers.conv2d( - input=inputMasks, - num_filters=num_filters, - filter_size=kSize, - stride=stride, - padding=padding, - act=None, - bias_attr=convBias) - - maskActiv = GaussActivation(maskFeatures, 1.1, 2.0, 1.0, 1.0) - - maskUpdate = MaskUpdate(maskFeatures, 0.8) - - return maskActiv, maskUpdate - - - - -def ReverseAttention(ecFeaturesSkip, dcFeatures, maskFeaturesForAttention, num_filters, bn=True, activ='leaky', \ - kSize=4, stride=2, padding=1, outPadding=0,convBias=False): - - nextDcFeatures = fluid.layers.conv2d_transpose( - input=dcFeatures, - num_filters=num_filters, - filter_size=kSize, - stride=stride, - padding=padding, - act=None, - bias_attr=convBias) - concatFeatures = fluid.layers.concat( - [ecFeaturesSkip, nextDcFeatures], axis=1) - outputFeatures = concatFeatures * maskFeaturesForAttention - - if bn: - outputFeatures = fluid.layers.batch_norm(input=outputFeatures) - - if activ == 'leaky': - outputFeatures = fluid.layers.leaky_relu(outputFeatures, alpha=0.2) - - elif activ == 'relu': - outputFeatures = fluid.layers.relu(outputFeatures) - - elif activ == 'sigmoid': - outputFeatures = fluid.layers.sigmoid(outputFeatures) - - elif activ == 'tanh': - outputFeatures = fluid.layers.tanh(outputFeatures) - - elif activ == 'prelu': - outputFeatures = fluid.layers.prelu(outputFeatures, 'all') - - else: - pass - - return outputFeatures diff --git a/PaddleCV/Research/inpainting-lbam/test.py b/PaddleCV/Research/inpainting-lbam/test.py deleted file mode 100644 index 9b13a43be541dbc621eb35955c73ae8bb64e71b1..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/inpainting-lbam/test.py +++ /dev/null @@ -1,81 +0,0 @@ -import os -import sys -import paddle -import paddle.fluid as fluid -import cv2 -import numpy as np -import glob -from paddle.fluid.framework import Parameter - -from LBAMModel import LBAMModel - -import functools -import argparse -from utility import add_arguments, print_arguments -parser = argparse.ArgumentParser(description=__doc__) -add_arg = functools.partial(add_arguments, argparser=parser) -# yapf: disable -add_arg('imgfn', str, None, "image file name.") -add_arg('maskfn', str, None, "mask file name.") -add_arg('resultfn', str, None, "result file name.") -add_arg('pretrained_model', str, None, "pretrained_model") - -def test(): - args = parser.parse_args() - print_arguments(args) - - pretrained_model = args.pretrained_model - - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(dirname=pretrained_model, executor=exe, model_filename='model', params_filename='params') - - imgfn = args.imgfn - maskfn = args.maskfn - resultfn = args.resultfn - if not os.path.exists(args.resultfn): - os.makedirs(args.resultfn) - - imglist = sorted(glob.glob(imgfn)) - masklist = sorted(glob.glob(maskfn)) - - for imgfn_,maskfn_ in (list(zip(imglist,masklist))): - print(imgfn_) - print(maskfn_) - print('') - - img = cv2.imread(imgfn_) - mask = cv2.imread(maskfn_) - - img = img.transpose(2, 0, 1)[::-1] - img = img.astype(np.float32)/255.0 - mask = mask.transpose(2, 0, 1) - mask = mask.astype(np.float32)/255.0 - - threshhold = 0.5 - mask = (mask >= threshhold).astype(np.float32) - - # CHW RGB - mask = 1 - mask - img = img * mask - - img0 = img - img = np.concatenate((img, mask[0:1]), axis=0) - - result = exe.run(inference_program,feed={feed_target_names[0]: img[np.newaxis,:], feed_target_names[1]: mask[np.newaxis,:]}, fetch_list=fetch_targets) - - outimg = result[0][0] - outimg = outimg * (1-mask) + img0 * mask - - # BGR HWC - outimg = outimg[::-1].transpose(1, 2, 0)*255.0 - - - outfn = os.path.join(args.resultfn, os.path.basename(imgfn_)) - cv2.imwrite(outfn,outimg) - - -if __name__ == '__main__': - test() diff --git a/PaddleCV/Research/inpainting-lbam/test.sh b/PaddleCV/Research/inpainting-lbam/test.sh deleted file mode 100644 index f7ba89378cb70ad7183683b1a78f75b85709cf5a..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/inpainting-lbam/test.sh +++ /dev/null @@ -1,11 +0,0 @@ -mkdir -p results/paris - -FLAGS_fraction_of_gpu_memory_to_use=0.1 \ -CUDA_VISIBLE_DEVICES=0 \ -FLAGS_eager_delete_tensor_gb=0.0 \ -FLAGS_fast_eager_deletion_mode=1 \ -python -u test.py \ ---pretrained_model 'pretrained_models/LBAM_ParisStreetView' \ # path to the pretrained model ---imgfn 'imgs/paris/pic.png' \ # input picture. ---maskfn 'imgs/paris/mask.png' \ # mask. ---resultfn 'results/paris' # folder for the result. \ No newline at end of file diff --git a/PaddleCV/Research/inpainting-lbam/utility.py b/PaddleCV/Research/inpainting-lbam/utility.py deleted file mode 100644 index 63dabd41da256bd6b3d3a9adda269217e780689e..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/inpainting-lbam/utility.py +++ /dev/null @@ -1,49 +0,0 @@ -"""Contains common utility functions.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import distutils.util -import numpy as np -import six -from paddle.fluid import core - - -def print_arguments(args): - """Print argparse's arguments. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - parser.add_argument("name", default="Jonh", type=str, help="User name.") - args = parser.parse_args() - print_arguments(args) - - :param args: Input argparse.Namespace for printing. - :type args: argparse.Namespace - """ - print("----------- Configuration Arguments -----------") - for arg, value in sorted(six.iteritems(vars(args))): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") - - -def add_arguments(argname, type, default, help, argparser, **kwargs): - """Add argparse's argument. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - add_argument("name", str, "Jonh", "User name.", parser) - args = parser.parse_args() - """ - type = distutils.util.strtobool if type == bool else type - argparser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) diff --git a/PaddleCV/Research/landmark/README.md b/PaddleCV/Research/landmark/README.md deleted file mode 100644 index 460f5b4e2a8bd5b08fee6e2d6224cd0f9c1e0224..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/README.md +++ /dev/null @@ -1,88 +0,0 @@ -# Google Landmark Retrieval and Recognition 2019 -The Google Landmark Dataset V2 is currently the largest publicly image retrieval and recogntion dataset, including 4M training data, more than 100,000 query images and nearly 1M index data. The large amounts of images in training dataset is the driving force of the generalizability of machine learning models. Here, we release our trained models in Google Landmark 2019 Competition, the detail of our solution can refer to our paper [[link](https://arxiv.org/pdf/1906.03990.pdf)]. - -## Retrieval Models - -We fine-tune four convolutional neural networks to extract our global image descriptors. The four convolutional backbones include ResNet152, ResNet200, SE ResNeXt152 and InceptionV4. We choose arcmargin and npairs as our training loss, We train these models using Google Landmark V2 training set and index set. You can download trained models here. The training code can refer to metric learning [[link](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/metric_learning)]. - -|model | public | private -|- | - | -: -|[res152_arcmargin](https://landmark.gz.bcebos.com/res152_arcmargin.tar) | 0.2676 | 0.3020 -|[res152_arcmargin_index](https://landmark.gz.bcebos.com/res152_arcmargin_index.tar) | 0.2476 | 0.2707 -|[res152_npairs](https://landmark.gz.bcebos.com/res152_npairs.tar) | 0.2597 | 0.2870 -|[res200_arcmargin](https://landmark.gz.bcebos.com/res200_arcmargin.tar) | 0.2670 | 0.3042 -|[se_x152_arcmargin](https://landmark.gz.bcebos.com/se_x152_arcmargin.tar) | 0.2670 | 0.2914 -|[inceptionv4_arcmargin](https://landmark.gz.bcebos.com/inceptionv4_arcmargin.tar) | 0.2685 | 0.2933 - -In addition, we also train a classification model based on ResNet152 with ~4M Google Landmark V2 training set. ([res152_softmax_v1](https://landmark.gz.bcebos.com/res152_softmax_v1.tar)) -The taining code can refer to image classification [[link](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification)]. - -## Recognition Models - -There are three models in our recognition solution. - -1.[res152_arcmargin](https://landmark.gz.bcebos.com/res152_arcmargin.tar): Retrieval model based on Resnet152 and arcmargin which is the same as in the retrieval task. - -2.[res152_softmax_v2](https://landmark.gz.bcebos.com/res152_softmax_v2.tar): Classification model based on Resnet152 and softmax with ~3M Google Landmark V2 tidied training set. The training code can refer to image classification [[link](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification)]. - -3.[res50_oid_v4_detector](https://landmark.gz.bcebos.com/res50_oid_v4_detector.tar): Object detector model for the non-landmark images filtering. The mAP of this model is ~0.55 on the OID V4 track (public LB). The training code can refer to RCNN detector [[link](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/rcnn)]. - -## Environment - -Cudnn >= 7, CUDA 9, PaddlePaddle version >= 1.3, python version 2.7 - -## Inference - -### 1.Compile paddle infer so and predict with binary model - -There are two different type of models in PaddlePaddle: train model and binary model. Predict with the binary model is more efficient. Thus, at first we compile paddle infer so and convert train model to binary model. - -(1) Compile paddle infer so - -Please refer the README.md in pypredict. - -(2) Convert train model to binary model - -``` - pushd inference - sh convert.sh -``` - -### 2.Extract retrieval feature and calculate cosine distance - -In the folder ./inference/test_data, there are four images, 0.jpg and 1.jpg are same landmark images, 2.jpg is another landmark image, 3.jpg is a non-lamdnark image. - -We will extract the features of these images, and calculate the cosine distances between 0.jpg and 1.jpg, 2.jpg, 3.jpg. - -``` -pushd inference -. set_env.sh -python infer_retrieval.py test_retrieval model_name [res152_arcmargin, res152_arcmargin_index, res152_npairs, res200_arcmargin, se_x152_arcmargin, inceptionv4_arcmargin] - -example: - python infer_retrieval.py test_retrieval res152_arcmargin -popd -``` - -### 3.Predict the classification label of images - -``` -pushd inference -. set_env.sh -python infer_recognition.py test_cls img_path model_name [res152_softmax_v1, res152_softmax_v2] - -example: - python infer_recognition.py test_cls test_data/0.jpg res152_softmax_v1 -popd -``` -You will get the inference label and score. - -### 4.Detect images - -``` - pushd inference - . set_env.sh - python infer_recognition.py test_det ./test_data/2e44b31818acc600.jpeg -``` - -You will get the inference detetor bounding box and classes. The class mapping file: pretrained_models/res50_oid_v4_detector/cls_name_idx_map_openimagev4_500.txt diff --git a/PaddleCV/Research/landmark/inference/conf/paddle-cls.conf b/PaddleCV/Research/landmark/inference/conf/paddle-cls.conf deleted file mode 100644 index 65c5508ec816d06e1f049bfb42cd6bea3c5dc716..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/conf/paddle-cls.conf +++ /dev/null @@ -1,19 +0,0 @@ -# for c++ predict -[res152_softmax_v1_predict] -# set the used of GPU card -res152_softmax_v1_device : 0 -# set whether print the debug infor -res152_softmax_v1_debug : 0 -# set the initial ratio of the GPU memory -res152_softmax_v1_fraction_of_gpu_memory: 0.1 -# binary model structure -res152_softmax_v1_prog_file: ./binary_models/res152_softmax_v1/model -# binary model params -res152_softmax_v1_param_file: ./binary_models/res152_softmax_v1/params - -[res152_softmax_v2_predict] -res152_softmax_v2_device : 0 -res152_softmax_v2_debug : 0 -res152_softmax_v2_fraction_of_gpu_memory: 0.1 -res152_softmax_v2_prog_file: ./binary_models/res152_softmax_v2/model -res152_softmax_v2_param_file: ./binary_models/res152_softmax_v2/params diff --git a/PaddleCV/Research/landmark/inference/conf/paddle-det.conf b/PaddleCV/Research/landmark/inference/conf/paddle-det.conf deleted file mode 100644 index b0310efbbcc48f4a0e49668c305af90ccb4d3fcb..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/conf/paddle-det.conf +++ /dev/null @@ -1,20 +0,0 @@ -# for c++ predict -[paddle-classify_predict] -# set the used of GPU card -paddle-classify_device : 0 -# set whether print the debug infor -paddle-classify_debug : 0 -# set the initial ratio of the GPU memory -paddle-classify_fraction_of_gpu_memory: 0.1 -# binary model structure -paddle-classify_prog_file: ./pretrained_models/res50_oid_v4_detector/infer_model/model -# binary model params -paddle-classify_param_file: ./pretrained_models/res50_oid_v4_detector/infer_model/params - -[paddle-det] -#total detector class number -class_nums:501 -#infer image size -new_size:800 -#max infer image size -max_size:1333 diff --git a/PaddleCV/Research/landmark/inference/conf/paddle-retrieval.conf b/PaddleCV/Research/landmark/inference/conf/paddle-retrieval.conf deleted file mode 100644 index fc4ce182210bb1f7d1173da3a64c003d75a8431f..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/conf/paddle-retrieval.conf +++ /dev/null @@ -1,54 +0,0 @@ -# for c++ predict -[res152_arcmargin_predict] -# set the used of GPU card -res152_arcmargin_device : 0 -# set whether print the debug infor -res152_arcmargin_debug : 0 -# set the initial ratio of the GPU memory -res152_arcmargin_fraction_of_gpu_memory: 0.1 -# binary model structure -res152_arcmargin_prog_file: ./binary_models/res152_arcmargin/model -# binary model params -res152_arcmargin_param_file: ./binary_models/res152_arcmargin/params -# input shape -input_size: 448 - -[res152_arcmargin_index_predict] -res152_arcmargin_index_device : 0 -res152_arcmargin_index_debug : 0 -res152_arcmargin_index_fraction_of_gpu_memory: 0.1 -res152_arcmargin_index_prog_file: ./binary_models/res152_arcmargin_index/model -res152_arcmargin_index_param_file: ./binary_models/res152_arcmargin_index/params -input_size: 448 - -[res152_npairs_predict] -res152_npairs_device : 0 -res152_npairs_debug : 0 -res152_npairs_fraction_of_gpu_memory: 0.1 -res152_npairs_prog_file: ./binary_models/res152_npairs/model -res152_npairs_param_file: ./binary_models/res152_npairs/params -input_size: 448 - -[res200_arcmargin_predict] -res200_arcmargin_device : 0 -res200_arcmargin_debug : 0 -res200_arcmargin_fraction_of_gpu_memory: 0.1 -res200_arcmargin_prog_file: ./binary_models/res200_arcmargin/model -res200_arcmargin_param_file: ./binary_models/res200_arcmargin/params -input_size: 448 - -[se_x152_arcmargin_predict] -se_x152_arcmargin_device : 0 -se_x152_arcmargin_debug : 0 -se_x152_arcmargin_fraction_of_gpu_memory: 0.1 -se_x152_arcmargin_prog_file: ./binary_models/se_x152_arcmargin/model -se_x152_arcmargin_param_file: ./binary_models/se_x152_arcmargin/params -input_size: 448 - -[inceptionv4_arcmargin_predict] -inceptionv4_arcmargin_device : 0 -inceptionv4_arcmargin_debug : 0 -inceptionv4_arcmargin_fraction_of_gpu_memory: 0.1 -inceptionv4_arcmargin_prog_file: ./binary_models/inceptionv4_arcmargin/model -inceptionv4_arcmargin_param_file: ./binary_models/inceptionv4_arcmargin/params -input_size: 555 diff --git a/PaddleCV/Research/landmark/inference/convert.sh b/PaddleCV/Research/landmark/inference/convert.sh deleted file mode 100644 index b13256d776989c3092db593d20e7aa45efd84a13..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/convert.sh +++ /dev/null @@ -1,23 +0,0 @@ -#res152_softmax_v1 -python convert_binary_model.py --model='ResNet152_vd_fc' --pretrained_model=pretrained_models/res152_softmax_v1/ --binary_model=./binary_models/res152_softmax_v1 --image_shape=3,224,224 --task_mode='classification' - -#res152_softmax_v2 -python convert_binary_model.py --model='ResNet152_vd' --pretrained_model=pretrained_models/res152_softmax_v2/ --binary_model=./binary_models/res152_softmax_v2 --image_shape=3,224,224 --task_mode='classification' - -#res152_arcmargin -python convert_binary_model.py --model='ResNet152_vd_v0_embedding' --pretrained_model=pretrained_models/res152_arcmargin/ --binary_model=./binary_models/res152_arcmargin --image_shape=3,448,448 --task_mode='retrieval' - -#res152_arcmargin_index -python convert_binary_model.py --model='ResNet152_vd_v0_embedding' --pretrained_model=pretrained_models/res152_arcmargin_index/ --binary_model=./binary_models/res152_arcmargin_index --image_shape=3,448,448 --task_mode='retrieval' - -#res152_npairs -python convert_binary_model.py --model='ResNet152_vd_v0_embedding' --pretrained_model=pretrained_models/res152_npairs/ --binary_model=./binary_models/res152_npairs --image_shape=3,448,448 --task_mode='retrieval' - -#res200_arcmargin -python convert_binary_model.py --model='ResNet200_vd_embedding' --pretrained_model=pretrained_models/res200_arcmargin/ --binary_model=./binary_models/res200_arcmargin --image_shape=3,448,448 --task_mode='retrieval' - -#se_x152_arcmargin -python convert_binary_model.py --model='SE_ResNeXt152_64x4d_vd_embedding' --pretrained_model=pretrained_models/se_x152_arcmargin/ --binary_model=./binary_models/se_x152_arcmargin --image_shape=3,448,448 --task_mode='retrieval' - -#inceptionv4_arcmargin -python convert_binary_model.py --model='InceptionV4_embedding' --pretrained_model=pretrained_models/inceptionv4_arcmargin --binary_model=./binary_models/inceptionv4_arcmargin --image_shape=3,555,555 --task_mode='retrieval' diff --git a/PaddleCV/Research/landmark/inference/convert_binary_model.py b/PaddleCV/Research/landmark/inference/convert_binary_model.py deleted file mode 100644 index c655431495694069020449c3584838d82f4e52de..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/convert_binary_model.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import argparse -import functools -import paddle -import paddle.fluid as fluid -import models -from utility import add_arguments, print_arguments - -parser = argparse.ArgumentParser(description=__doc__) -add_arg = functools.partial(add_arguments, argparser=parser) -# yapf: disable -add_arg('model', str, "ResNet200_vd", "Set the network to use.") -add_arg('embedding_size', int, 512, "Embedding size.") -add_arg('image_shape', str, "3,448,448", "Input image size.") -add_arg('pretrained_model', str, None, "Whether to use pretrained model.") -add_arg('binary_model', str, None, "Set binary_model dir") -add_arg('task_mode', str, "retrieval", "Set task mode") -# yapf: enable - -model_list = [m for m in dir(models) if "__" not in m] - - -def convert(args): - # parameters from arguments - model_name = args.model - pretrained_model = args.pretrained_model - if not os.path.exists(pretrained_model): - print("pretrained_model doesn't exist!") - sys.exit(-1) - image_shape = [int(m) for m in args.image_shape.split(",")] - - assert model_name in model_list, "{} is not in lists: {}".format(args.model, - model_list) - - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - - # model definition - model = models.__dict__[model_name]() - if args.task_mode == 'retrieval': - out = model.net(input=image, embedding_size=args.embedding_size) - else: - out = model.net(input=image) - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - def if_exist(var): - return os.path.exists(os.path.join(pretrained_model, var.name)) - fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) - - fluid.io.save_inference_model( - dirname = args.binary_model, - feeded_var_names = ['image'], - target_vars = [out['embedding']] if args.task_mode == 'retrieval' else [out], - executor = exe, - main_program = None, - model_filename = 'model', - params_filename = 'params') - - print('input_name: {}'.format('image')) - print('output_name: {}'.format(out['embedding'].name)) if args.task_mode == 'retrieval' else ('output_name: {}'.format(out.name)) - print("convert done.") - - -def main(): - args = parser.parse_args() - print_arguments(args) - convert(args) - - -if __name__ == '__main__': - main() diff --git a/PaddleCV/Research/landmark/inference/infer_recognition.py b/PaddleCV/Research/landmark/inference/infer_recognition.py deleted file mode 100644 index 4240a20576da59aa93655ff810d41f588f335735..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/infer_recognition.py +++ /dev/null @@ -1,285 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -sys.path.append('./so') -import time - -import cv2 -import numpy as np - -from ConfigParser import ConfigParser -from PyCNNPredict import PyCNNPredict - -#infer detector -def det_preprocessor(im, new_size, max_size): - im = im.astype(np.float32, copy=False) - img_mean = [0.485, 0.456, 0.406] - img_std = [0.229, 0.224, 0.225] - im = im[:, :, ::-1] - im = im / 255 - im -= img_mean - im /= img_std - im_shape = im.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - im_scale = float(new_size) / float(im_size_min) - # Prevent the biggest axis from being more than max_size - if np.round(im_scale * im_size_max) > max_size: - im_scale = float(max_size) / float(im_size_max) - im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) - channel_swap = (2, 0, 1) #(batch, channel, height, width) - im = im.transpose(channel_swap) - return im, im_scale - -def nms(dets, thresh): - """nms""" - x1 = dets[:, 0] - y1 = dets[:, 1] - x2 = dets[:, 2] - y2 = dets[:, 3] - scores = dets[:, 4] - - areas = (x2 - x1 + 1) * (y2 - y1 + 1) - dt_num = dets.shape[0] - order = np.array(range(dt_num)) - - keep = [] - while order.size > 0: - i = order[0] - keep.append(i) - xx1 = np.maximum(x1[i], x1[order[1:]]) - yy1 = np.maximum(y1[i], y1[order[1:]]) - xx2 = np.minimum(x2[i], x2[order[1:]]) - yy2 = np.minimum(y2[i], y2[order[1:]]) - - w = np.maximum(0.0, xx2 - xx1 + 1) - h = np.maximum(0.0, yy2 - yy1 + 1) - inter = w * h - ovr = inter / (areas[i] + areas[order[1:]] - inter) - inds = np.where(ovr <= thresh)[0] - order = order[inds + 1] - return keep - -def box_decoder(deltas, boxes, weights): - boxes = boxes.astype(deltas.dtype, copy=False) - widths = boxes[:, 2] - boxes[:, 0] + 1.0 - heights = boxes[:, 3] - boxes[:, 1] + 1.0 - ctr_x = boxes[:, 0] + 0.5 * widths - ctr_y = boxes[:, 1] + 0.5 * heights - wx, wy, ww, wh = weights - dx = deltas[:, 0::4] * wx - dy = deltas[:, 1::4] * wy - dw = deltas[:, 2::4] * ww - dh = deltas[:, 3::4] * wh - # Prevent sending too large values into np.exp() - clip_value = np.log(1000. / 16.) - dw = np.minimum(dw, clip_value) - dh = np.minimum(dh, clip_value) - pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] - pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] - pred_w = np.exp(dw) * widths[:, np.newaxis] - pred_h = np.exp(dh) * heights[:, np.newaxis] - pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) - # x1 - pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w - # y1 - pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h - # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) - pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 - # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) - pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 - return pred_boxes - -def clip_tiled_boxes(boxes, im_shape): - """Clip boxes to image boundaries. im_shape is [height, width] and boxes - has shape (N, 4 * num_tiled_boxes).""" - # x1 >= 0 - boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) - # y1 >= 0 - boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) - # x2 < im_shape[1] - boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) - # y2 < im_shape[0] - boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) - return boxes - -def get_dt_res_common(rpn_rois_v, confs_v, locs_v, class_nums, im_info, im_id): - dts_res = [] - if len(rpn_rois_v) == 0: - return None - variance_v = np.array([0.1, 0.1, 0.2, 0.2]) - img_height, img_width, img_scale = im_info - tmp_v = box_decoder(locs_v, rpn_rois_v, variance_v) - tmp_v = clip_tiled_boxes(tmp_v, [img_height, img_width]) - decoded_box_v = tmp_v / img_scale - - cls_boxes = [[] for _ in range(class_nums)] - for j in range(1, class_nums): - inds = np.where(confs_v[:, j] >= 0.1)[0] - scores_j = confs_v[inds, j] - rois_j = decoded_box_v[inds, j * 4:(j + 1) * 4] - dets_j = np.hstack((rois_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False) - cls_rank = np.argsort(-dets_j[:, -1]) - dets_j = dets_j[cls_rank] - keep = nms(dets_j, 0.5) - nms_dets = dets_j[keep, :] - cls_boxes[j] = nms_dets - - # Limit to max_per_image detections **over all classes** - image_scores = np.hstack([cls_boxes[j][:, -1] for j in range(1, class_nums)]) - if len(image_scores) > 100: - image_thresh = np.sort(image_scores)[-100] - for j in range(1, class_nums): - keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] - cls_boxes[j] = cls_boxes[j][keep, :] - for j in range(1, class_nums): - for dt in cls_boxes[j]: - xmin, ymin, xmax, ymax, score = dt.tolist() - w = xmax - xmin + 1 - h = ymax - ymin + 1 - bbox = [xmin, ymin, w, h] - dt_res = { - 'image_id': im_id, - 'category_id': j, - 'bbox': bbox, - 'score': score - } - dts_res.append(dt_res) - return dts_res - -def test_det(img_path): - conf_file = './conf/paddle-det.conf' - prefix = 'paddle-classify_' - conf = loadconfig(conf_file) - det_prefix = 'paddle-det' - class_nums = conf.getint(det_prefix, 'class_nums') - new_size = conf.getfloat(det_prefix, 'new_size') - max_size = conf.getfloat(det_prefix, 'max_size') - predictor = PyCNNPredict() - predictor.init(conf_file, prefix) - im = cv2.imread(img_path) - if im is None: - print("image doesn't exist!") - sys.exit(-1) - img_height_ori = im.shape[0] - img_width_ori = im.shape[1] - im, im_scale = det_preprocessor(im, new_size, max_size) - im_height = np.round(img_height_ori * im_scale) - im_width = np.round(img_width_ori * im_scale) - im_info = np.array([im_height, im_width, im_scale], dtype=np.float32) - im_data_shape = np.array([1, im.shape[0], im.shape[1], im.shape[2]]) - im_info_shape = np.array([1, 3]) - im = im.flatten().astype(np.float32) - im_info = im_info.flatten().astype(np.float32) - inputdatas = [im, im_info] - inputshapes = [im_data_shape.astype(np.int32), im_info_shape.astype(np.int32)] - for ino in range(2): - starttime = time.time() - res = predictor.predict(inputdatas, inputshapes, []) - rpn_rois_v = res[0][0].reshape(-1, 4) - confs_v = res[0][1].reshape(-1, class_nums) - locs_v = res[0][2].reshape(-1, class_nums * 4) - dts_res = get_dt_res_common(rpn_rois_v, confs_v, locs_v, class_nums, im_info, 0) - print("Time:%.3f" % (time.time() - starttime)) - print(dts_res) - -##infer cls -def normwidth(size, margin=32): - outsize = size // margin * margin - return outsize - - -def loadconfig(configurefile): - "load config from file" - config = ConfigParser() - config.readfp(open(configurefile, 'r')) - return config - - -def resize_short(img, target_size): - """ resize_short """ - percent = float(target_size) / min(img.shape[0], img.shape[1]) - resized_width = int(round(img.shape[1] * percent)) - resized_height = int(round(img.shape[0] * percent)) - - resized_width = normwidth(resized_width) - resized_height = normwidth(resized_height) - resized = cv2.resize(img, (resized_width, resized_height)) - return resized - - -def crop_image(img, target_size, center): - """ crop_image """ - height, width = img.shape[:2] - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = random.randint(0, width - size) - h_start = random.randint(0, height - size) - w_end = w_start + size - h_end = h_start + size - img = img[h_start:h_end, w_start:w_end, :] - return img - - -def cls_preprocessor(im, new_size): - img_mean = [0.485, 0.456, 0.406] - img_std = [0.229, 0.224, 0.225] - - img = resize_short(im, 224) - img = crop_image(img, target_size=224, center=True) - - img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255 - img_mean = np.array(img_mean).reshape((3, 1, 1)) - img_std = np.array(img_std).reshape((3, 1, 1)) - img -= img_mean - img /= img_std - return img - - -def test_cls(img_path, model_name): - conf_file = './conf/paddle-cls.conf' - prefix = model_name + "_" - conf = loadconfig(conf_file) - predictor = PyCNNPredict() - predictor.init(conf_file, prefix) - im = cv2.imread(img_path) - if im is None: - print("image doesn't exist!") - sys.exit(-1) - im = cls_preprocessor(im, 224) - im_data_shape = np.array([1, im.shape[0], im.shape[1], im.shape[2]]) - im = im.flatten().astype(np.float32) - inputdatas = [im] - inputshapes = [im_data_shape.astype(np.int32)] - for ino in range(5): - starttime = time.time() - res = predictor.predict(inputdatas, inputshapes, []) - print "Time:", time.time() - starttime - - result = res[0][0] - pred_label = np.argsort(result)[::-1][:1] - - print(pred_label) - print(result[pred_label]) - -if __name__ == "__main__": - if len(sys.argv)>1 : - func = getattr(sys.modules[__name__], sys.argv[1]) - func(*sys.argv[2:]) - else: - print >> sys.stderr,'tools.py command' diff --git a/PaddleCV/Research/landmark/inference/infer_retrieval.py b/PaddleCV/Research/landmark/inference/infer_retrieval.py deleted file mode 100644 index 97cb2515315df1266431bb6f2f3e78ae72da856e..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/infer_retrieval.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import sys -sys.path.append('./so') -import time - -import cv2 -import numpy as np - -from ConfigParser import ConfigParser -from PyCNNPredict import PyCNNPredict - -def normwidth(size, margin=32): - outsize = size // margin * margin - outsize = max(outsize, margin) - return outsize - -def loadconfig(configurefile): - "load config from file" - config = ConfigParser() - config.readfp(open(configurefile, 'r')) - return config - -def resize_short(img, target_size): - """ resize_short """ - percent = float(target_size) / min(img.shape[0], img.shape[1]) - resized_width = int(round(img.shape[1] * percent)) - resized_height = int(round(img.shape[0] * percent)) - - resized_width = normwidth(resized_width) - resized_height = normwidth(resized_height) - resized = cv2.resize(img, (resized_width, resized_height), interpolation=cv2.INTER_LANCZOS4) - return resized - -def crop_image(img, target_size, center): - """ crop_image """ - height, width = img.shape[:2] - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = random.randint(0, width - size) - h_start = random.randint(0, height - size) - w_end = w_start + size - h_end = h_start + size - img = img[h_start:h_end, w_start:w_end, :] - return img - - -def preprocessor(img, crop_size): - img_mean = [0.485, 0.456, 0.406] - img_std = [0.229, 0.224, 0.225] - - h, w = img.shape[:2] - ratio = float(max(w, h)) / min(w, h) - if ratio > 3: - crop_size = int(crop_size * 3 / ratio) - img = resize_short(img, crop_size) - - img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255 - img_mean = np.array(img_mean).reshape((3, 1, 1)) - img_std = np.array(img_std).reshape((3, 1, 1)) - img -= img_mean - img /= img_std - - return img - -def cosinedist(a, b): - return np.dot(a, b) / (np.sum(a * a) * np.sum(b * b))**0.5 - -def test_retrieval(model_name): - conf_file = './conf/paddle-retrieval.conf' - prefix = model_name + "_" - config = loadconfig(conf_file) - predictor = PyCNNPredict() - predictor.init(conf_file, prefix) - input_size = config.getint(prefix + 'predict', 'input_size') - - img_names = [ - './test_data/0.jpg', - './test_data/1.jpg', - './test_data/2.jpg', - './test_data/3.jpg' - ] - img_feas = [] - for img_path in img_names: - im = cv2.imread(img_path) - if im is None: - return None - im = preprocessor(im, input_size) - im_data_shape = np.array([1, im.shape[0], im.shape[1], im.shape[2]]) - im = im.flatten().astype(np.float32) - inputdatas = [im] - inputshapes = [im_data_shape.astype(np.int32)] - run_time = 0 - starttime = time.time() - res = predictor.predict(inputdatas, inputshapes, []) - run_time += (time.time() - starttime) - fea = res[0][0] - img_feas.append(fea) - print("Time:", run_time) - - for i in xrange(len(img_names)-1): - cosdist = cosinedist(img_feas[0], img_feas[i+1]) - cosdist = max(min(cosdist, 1), 0) - print('cosine dist between {} and {}: {}'.format(0, i+1, cosdist)) - -if __name__ == "__main__": - if len(sys.argv)>1 : - func = getattr(sys.modules[__name__], sys.argv[1]) - func(*sys.argv[2:]) - else: - print >> sys.stderr,'tools.py command' diff --git a/PaddleCV/Research/landmark/inference/models/__init__.py b/PaddleCV/Research/landmark/inference/models/__init__.py deleted file mode 100644 index 4427196bc9df54f23cebfaddf5c2a920ee103b91..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/models/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .inceptionv4_embedding import InceptionV4_embedding -from .resnet_vd import ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd -from .resnet_vd_embedding import ResNet50_vd_embedding, ResNet101_vd_embedding, ResNet152_vd_embedding, ResNet200_vd_embedding -from .resnet_vd_fc import ResNet50_vd_fc, ResNet101_vd_fc, ResNet152_vd_fc -from .resnet_vd_v0_embedding import ResNet50_vd_v0_embedding, ResNet101_vd_v0_embedding, ResNet152_vd_v0_embedding -from .se_resnext_vd_embedding import SE_ResNeXt50_32x4d_vd_embedding, SE_ResNeXt101_32x4d_vd_embedding, SE_ResNeXt152_64x4d_vd_embedding diff --git a/PaddleCV/Research/landmark/inference/models/inceptionv4_embedding.py b/PaddleCV/Research/landmark/inference/models/inceptionv4_embedding.py deleted file mode 100644 index 625780bd5fbdaaf826aa80f36c49711eae24eb31..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/models/inceptionv4_embedding.py +++ /dev/null @@ -1,175 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle -import paddle.fluid as fluid -import math -from paddle.fluid.param_attr import ParamAttr -__all__ = ['InceptionV4_embedding'] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [10, 16, 20], - "steps": [0.01, 0.001, 0.0001, 0.00001] - } -} -class InceptionV4_embedding(): - def __init__(self): - self.params = train_parameters - def net(self, input, embedding_size=256): - endpoints = {} - x = self.inception_stem(input) - for i in range(4): - x = self.inceptionA(x,name=str(i+1)) - x = self.reductionA(x) - for i in range(7): - x = self.inceptionB(x,name=str(i+1)) - x = self.reductionB(x) - for i in range(3): - x = self.inceptionC(x,name=str(i+1)) - pool = fluid.layers.pool2d( - input=x, pool_size=8, pool_type='avg', global_pooling=True) - if embedding_size > 0: - embedding = fluid.layers.fc(input=pool, size=embedding_size) - endpoints['embedding'] = embedding - else: - endpoints['embedding'] = pool - return endpoints - def conv_bn_layer(self, - data, - num_filters, - filter_size, - stride=1, - padding=0, - groups=1, - act='relu',name=None): - conv = fluid.layers.conv2d( - input=data, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False, - name=name) - bn_name = name+"_bn" - return fluid.layers.batch_norm(input=conv, act=act,name=bn_name, - param_attr = ParamAttr(name=bn_name+"_scale"), - bias_attr=ParamAttr(name=bn_name+"_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - def inception_stem(self, data, name=None): - conv = self.conv_bn_layer(data, 32, 3, stride=2, act='relu', name="conv1_3x3_s2") - conv = self.conv_bn_layer(conv, 32, 3, act='relu', name="conv2_3x3_s1") - conv = self.conv_bn_layer(conv, 64, 3, padding=1, act='relu', name="conv3_3x3_s1") - pool1 = fluid.layers.pool2d( - input=conv, pool_size=3, pool_stride=2, pool_type='max') - conv2 = self.conv_bn_layer(conv, 96, 3, stride=2, act='relu',name="inception_stem1_3x3_s2") - concat = fluid.layers.concat([pool1, conv2], axis=1) - conv1 = self.conv_bn_layer(concat, 64, 1, act='relu',name="inception_stem2_3x3_reduce") - conv1 = self.conv_bn_layer(conv1, 96, 3, act='relu',name="inception_stem2_3x3") - conv2 = self.conv_bn_layer(concat, 64, 1, act='relu',name="inception_stem2_1x7_reduce") - conv2 = self.conv_bn_layer( - conv2, 64, (7, 1), padding=(3, 0), act='relu',name="inception_stem2_1x7") - conv2 = self.conv_bn_layer( - conv2, 64, (1, 7), padding=(0, 3), act='relu',name="inception_stem2_7x1") - conv2 = self.conv_bn_layer(conv2, 96, 3, act='relu',name="inception_stem2_3x3_2") - concat = fluid.layers.concat([conv1, conv2], axis=1) - conv1 = self.conv_bn_layer(concat, 192, 3, stride=2, act='relu',name="inception_stem3_3x3_s2") - pool1 = fluid.layers.pool2d( - input=concat, pool_size=3, pool_stride=2, pool_type='max') - concat = fluid.layers.concat([conv1, pool1], axis=1) - return concat - def inceptionA(self, data, name=None): - pool1 = fluid.layers.pool2d( - input=data, pool_size=3, pool_padding=1, pool_type='avg') - conv1 = self.conv_bn_layer(pool1, 96, 1, act='relu',name="inception_a"+name+"_1x1") - conv2 = self.conv_bn_layer(data, 96, 1, act='relu',name="inception_a"+name+"_1x1_2") - conv3 = self.conv_bn_layer(data, 64, 1, act='relu', name="inception_a"+name+"_3x3_reduce") - conv3 = self.conv_bn_layer(conv3, 96, 3, padding=1, act='relu', name="inception_a"+name+"_3x3") - conv4 = self.conv_bn_layer(data, 64, 1, act='relu', name="inception_a"+name+"_3x3_2_reduce") - conv4 = self.conv_bn_layer(conv4, 96, 3, padding=1, act='relu', name="inception_a"+name+"_3x3_2") - conv4 = self.conv_bn_layer(conv4, 96, 3, padding=1, act='relu',name="inception_a"+name+"_3x3_3") - concat = fluid.layers.concat([conv1, conv2, conv3, conv4], axis=1) - return concat - def reductionA(self, data, name=None): - pool1 = fluid.layers.pool2d( - input=data, pool_size=3, pool_stride=2, pool_type='max') - conv2 = self.conv_bn_layer(data, 384, 3, stride=2, act='relu',name="reduction_a_3x3") - conv3 = self.conv_bn_layer(data, 192, 1, act='relu',name="reduction_a_3x3_2_reduce") - conv3 = self.conv_bn_layer(conv3, 224, 3, padding=1, act='relu', name="reduction_a_3x3_2") - conv3 = self.conv_bn_layer(conv3, 256, 3, stride=2, act='relu',name="reduction_a_3x3_3") - concat = fluid.layers.concat([pool1, conv2, conv3], axis=1) - return concat - def inceptionB(self, data, name=None): - pool1 = fluid.layers.pool2d( - input=data, pool_size=3, pool_padding=1, pool_type='avg') - conv1 = self.conv_bn_layer(pool1, 128, 1, act='relu',name="inception_b"+name+"_1x1") - conv2 = self.conv_bn_layer(data, 384, 1, act='relu', name="inception_b"+name+"_1x1_2") - conv3 = self.conv_bn_layer(data, 192, 1, act='relu',name="inception_b"+name+"_1x7_reduce") - conv3 = self.conv_bn_layer( - conv3, 224, (1, 7), padding=(0, 3), act='relu',name="inception_b"+name+"_1x7") - conv3 = self.conv_bn_layer( - conv3, 256, (7, 1), padding=(3, 0), act='relu',name="inception_b"+name+"_7x1") - conv4 = self.conv_bn_layer(data, 192, 1, act='relu',name="inception_b"+name+"_7x1_2_reduce") - conv4 = self.conv_bn_layer( - conv4, 192, (1, 7), padding=(0, 3), act='relu',name="inception_b"+name+"_1x7_2") - conv4 = self.conv_bn_layer( - conv4, 224, (7, 1), padding=(3, 0), act='relu',name="inception_b"+name+"_7x1_2") - conv4 = self.conv_bn_layer( - conv4, 224, (1, 7), padding=(0, 3), act='relu',name="inception_b"+name+"_1x7_3") - conv4 = self.conv_bn_layer( - conv4, 256, (7, 1), padding=(3, 0), act='relu',name="inception_b"+name+"_7x1_3") - concat = fluid.layers.concat([conv1, conv2, conv3, conv4], axis=1) - return concat - def reductionB(self, data, name=None): - pool1 = fluid.layers.pool2d( - input=data, pool_size=3, pool_stride=2, pool_type='max') - conv2 = self.conv_bn_layer(data, 192, 1, act='relu',name="reduction_b_3x3_reduce") - conv2 = self.conv_bn_layer(conv2, 192, 3, stride=2, act='relu',name="reduction_b_3x3") - conv3 = self.conv_bn_layer(data, 256, 1, act='relu',name="reduction_b_1x7_reduce") - conv3 = self.conv_bn_layer( - conv3, 256, (1, 7), padding=(0, 3), act='relu',name="reduction_b_1x7") - conv3 = self.conv_bn_layer( - conv3, 320, (7, 1), padding=(3, 0), act='relu',name="reduction_b_7x1") - conv3 = self.conv_bn_layer(conv3, 320, 3, stride=2, act='relu',name="reduction_b_3x3_2") - concat = fluid.layers.concat([pool1, conv2, conv3], axis=1) - return concat - def inceptionC(self, data, name=None): - pool1 = fluid.layers.pool2d( - input=data, pool_size=3, pool_padding=1, pool_type='avg') - conv1 = self.conv_bn_layer(pool1, 256, 1, act='relu',name="inception_c"+name+"_1x1") - conv2 = self.conv_bn_layer(data, 256, 1, act='relu',name="inception_c"+name+"_1x1_2") - conv3 = self.conv_bn_layer(data, 384, 1, act='relu',name="inception_c"+name+"_1x1_3") - conv3_1 = self.conv_bn_layer( - conv3, 256, (1, 3), padding=(0, 1), act='relu',name="inception_c"+name+"_1x3") - conv3_2 = self.conv_bn_layer( - conv3, 256, (3, 1), padding=(1, 0), act='relu',name="inception_c"+name+"_3x1") - conv4 = self.conv_bn_layer(data, 384, 1, act='relu',name="inception_c"+name+"_1x1_4") - conv4 = self.conv_bn_layer( - conv4, 448, (1, 3), padding=(0, 1), act='relu',name="inception_c"+name+"_1x3_2") - conv4 = self.conv_bn_layer( - conv4, 512, (3, 1), padding=(1, 0), act='relu',name="inception_c"+name+"_3x1_2") - conv4_1 = self.conv_bn_layer( - conv4, 256, (1, 3), padding=(0, 1), act='relu',name="inception_c"+name+"_1x3_3") - conv4_2 = self.conv_bn_layer( - conv4, 256, (3, 1), padding=(1, 0), act='relu',name="inception_c"+name+"_3x1_3") - concat = fluid.layers.concat( - [conv1, conv2, conv3_1, conv3_2, conv4_1, conv4_2], axis=1) - return concat \ No newline at end of file diff --git a/PaddleCV/Research/landmark/inference/models/resnet_vd.py b/PaddleCV/Research/landmark/inference/models/resnet_vd.py deleted file mode 100644 index 44402a919343daeffb42f6b7d2959fa3dc237bf8..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/models/resnet_vd.py +++ /dev/null @@ -1,198 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -import math -__all__ = ["ResNet", "ResNet50_vd","ResNet101_vd", "ResNet152_vd", "ResNet200_vd"] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 45, 55], - "steps": [0.01, 0.001, 0.0001, 0.00001] - } -} -class ResNet(): - def __init__(self, layers=50, is_3x3 = False): - self.params = train_parameters - self.layers = layers - self.is_3x3 = is_3x3 - def net(self, input, class_dim=1000): - is_3x3 = self.is_3x3 - layers = self.layers - supported_layers = [50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_filters = [64, 128, 256, 512] - if is_3x3 == False: - conv = self.conv_bn_layer( - input=input, num_filters=64, filter_size=7, stride=2, act='relu') - else: - conv = self.conv_bn_layer( - input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') - conv = self.conv_bn_layer( - input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') - conv = self.conv_bn_layer( - input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152, 200] and block == 2: - if i == 0: - conv_name="res"+str(block+2)+"a" - else: - conv_name="res"+str(block+2)+"b"+str(i) - else: - conv_name="res"+str(block+2)+chr(97+i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block==0, - name=conv_name) - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc(input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, - stdv))) - - - softmaxout = fluid.layers.softmax(input=out) - return softmaxout - - - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) / 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm(input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d(input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - pool_type='avg') - - conv = fluid.layers.conv2d( - input=pool, - num_filters=num_filters, - filter_size=filter_size, - stride=1, - padding=(filter_size - 1) / 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm(input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - - def shortcut(self, input, ch_out, stride, name, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - if if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) - else: - return input - def bottleneck_block(self, input, num_filters, stride, name, if_first): - conv0 = self.conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu', name=name+"_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name+"_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name+"_branch2c") - short = self.shortcut(input, num_filters * 4, stride, if_first=if_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - -def ResNet50_vd(): - model = ResNet(layers=50, is_3x3 = True) - return model -def ResNet101_vd(): - model = ResNet(layers=101, is_3x3 = True) - return model -def ResNet152_vd(): - model = ResNet(layers=152, is_3x3 = True) - return model -def ResNet200_vd(): - model = ResNet(layers=200, is_3x3 = True) - return model diff --git a/PaddleCV/Research/landmark/inference/models/resnet_vd_embedding.py b/PaddleCV/Research/landmark/inference/models/resnet_vd_embedding.py deleted file mode 100644 index ab938426c6684ed9f49236d0ed6c4b7270af72bb..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/models/resnet_vd_embedding.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -import math -__all__ = ["ResNet", "ResNet50_vd_embedding","ResNet101_vd_embedding", "ResNet152_vd_embedding", "ResNet200_vd_embedding"] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} -class ResNet(): - def __init__(self, layers=50, is_3x3 = False): - self.params = train_parameters - self.layers = layers - self.is_3x3 = is_3x3 - def net(self, input, embedding_size=256): - is_3x3 = self.is_3x3 - layers = self.layers - supported_layers = [50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_filters = [64, 128, 256, 512] - endpoints = {} - if is_3x3 == False: - conv = self.conv_bn_layer( - input=input, num_filters=64, filter_size=7, stride=2, act='relu') - else: - conv = self.conv_bn_layer( - input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') - conv = self.conv_bn_layer( - input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') - conv = self.conv_bn_layer( - input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152, 200] and block == 2: - if i == 0: - conv_name="res"+str(block+2)+"a" - else: - conv_name="res"+str(block+2)+"b"+str(i) - else: - conv_name="res"+str(block+2)+chr(97+i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block==0, - name=conv_name) - pool = fluid.layers.pool2d( - input=conv, pool_size=14, pool_type='avg', global_pooling=True) - if embedding_size > 0: - embedding = fluid.layers.fc(input=pool, size=embedding_size) - endpoints['embedding'] = embedding - else: - endpoints['embedding'] = pool - return endpoints - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) / 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm(input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d(input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - pool_type='avg') - - conv = fluid.layers.conv2d( - input=pool, - num_filters=num_filters, - filter_size=filter_size, - stride=1, - padding=(filter_size - 1) / 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm(input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def shortcut(self, input, ch_out, stride, name, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - if if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) - else: - return input - def bottleneck_block(self, input, num_filters, stride, name, if_first): - conv0 = self.conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu', name=name+"_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name+"_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name+"_branch2c") - short = self.shortcut(input, num_filters * 4, stride, if_first=if_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - -def ResNet50_vd_embedding(): - model = ResNet(layers=50, is_3x3 = True) - return model -def ResNet101_vd_embedding(): - model = ResNet(layers=101, is_3x3 = True) - return model -def ResNet152_vd_embedding(): - model = ResNet(layers=152, is_3x3 = True) - return model -def ResNet200_vd_embedding(): - model = ResNet(layers=200, is_3x3 = True) - return model \ No newline at end of file diff --git a/PaddleCV/Research/landmark/inference/models/resnet_vd_fc.py b/PaddleCV/Research/landmark/inference/models/resnet_vd_fc.py deleted file mode 100644 index 6ae38172e6b0b5caebc8a8a21e974de6dd47923a..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/models/resnet_vd_fc.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -import math -__all__ = ["ResNet", "ResNet50_vd_fc", "ResNet101_vd_fc", "ResNet152_vd_fc"] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} -class ResNet(object): - """ResNet""" - def __init__(self, layers=50, is_3x3=False): - self.params = train_parameters - self.layers = layers - self.is_3x3 = is_3x3 - def net(self, input, class_dim=1000): - """net""" - is_3x3 = self.is_3x3 - layers = self.layers - supported_layers = [50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters = [64, 128, 256, 512] - if is_3x3 == False: - conv = self.conv_bn_layer( - input=input, num_filters=64, filter_size=7, stride=2, act='relu') - else: - conv = self.conv_bn_layer( - input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') - conv = self.conv_bn_layer( - input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') - conv = self.conv_bn_layer( - input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name="res" + str(block + 2) + "a" - else: - conv_name="res" + str(block + 2) + "b" + str(i) - else: - conv_name="res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == 0, - name=conv_name) - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - fcresult = fluid.layers.fc(input=pool, size=256) - stdv = 1.0 / math.sqrt(fcresult.shape[1] * 1.0) - out = fluid.layers.fc(input=fcresult, size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - #return out - softmaxout = fluid.layers.softmax(input=out) - return softmaxout - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - """conv_bn_layer""" - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) / 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm(input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - """conv_bn_layer_new""" - pool = fluid.layers.pool2d(input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - pool_type='avg') - - conv = fluid.layers.conv2d( - input=pool, - num_filters=num_filters, - filter_size=filter_size, - stride=1, - padding=(filter_size - 1) / 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm(input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def shortcut(self, input, ch_out, stride, name, if_first=False): - """shortcut""" - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - if if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) - else: - return input - def bottleneck_block(self, input, num_filters, stride, name, if_first): - """bottleneck_block""" - conv0 = self.conv_bn_layer( - input=input, num_filters=num_filters, - filter_size=1, act='relu', name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - conv2 = self.conv_bn_layer(input=conv1, num_filters=num_filters * 4, - filter_size=1, act=None, name=name + "_branch2c") - short = self.shortcut(input, num_filters * 4, stride, if_first=if_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - -def ResNet50_vd_fc(): - """ResNet50_vd""" - model = ResNet(layers=50, is_3x3 = True) - return model -def ResNet101_vd_fc(): - """ResNet101_vd""" - model = ResNet(layers=101, is_3x3 = True) - return model -def ResNet152_vd_fc(): - """ResNet152_vd""" - model = ResNet(layers=152, is_3x3 = True) - return model diff --git a/PaddleCV/Research/landmark/inference/models/resnet_vd_v0_embedding.py b/PaddleCV/Research/landmark/inference/models/resnet_vd_v0_embedding.py deleted file mode 100644 index 6e5b42886443ba5df17eae0635f5a76e17f504d5..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/models/resnet_vd_v0_embedding.py +++ /dev/null @@ -1,153 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle -import paddle.fluid as fluid -import math -__all__ = ["ResNet_vd", "ResNet50_vd_v0_embedding","ResNet101_vd_v0_embedding", "ResNet152_vd_v0_embedding"] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} -class ResNet_vd(): - def __init__(self, layers=50, is_3x3 = False): - self.params = train_parameters - self.layers = layers - self.is_3x3 = is_3x3 - def net(self, input, embedding_size=256): - is_3x3 = self.is_3x3 - layers = self.layers - supported_layers = [50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters = [64, 128, 256, 512] - endpoints = {} - if is_3x3 == False: - conv = self.conv_bn_layer( - input=input, num_filters=64, filter_size=7, stride=2, act='relu') - else: - conv = self.conv_bn_layer( - input=input, num_filters=32, filter_size=3, stride=2, act='relu') - conv = self.conv_bn_layer( - input=conv, num_filters=32, filter_size=3, stride=1, act='relu') - conv = self.conv_bn_layer( - input=conv, num_filters=64, filter_size=3, stride=1, act='relu') - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - for block in range(len(depth)): - for i in range(depth[block]): - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block==0) - pool = fluid.layers.pool2d( - input=conv, pool_size=14, pool_type='avg', global_pooling=True) - if embedding_size > 0: - embedding = fluid.layers.fc(input=pool, size=embedding_size) - endpoints['embedding'] = embedding - else: - endpoints['embedding'] = pool - return endpoints - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) / 2, - groups=groups, - act=None, - bias_attr=False) - return fluid.layers.batch_norm(input=conv, act=act) - - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None): - pool = fluid.layers.pool2d(input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - pool_type='avg') - - conv = fluid.layers.conv2d( - input=pool, - num_filters=num_filters, - filter_size=filter_size, - stride=1, - padding=(filter_size - 1) / 2, - groups=groups, - act=None, - bias_attr=False) - return fluid.layers.batch_norm(input=conv, act=act) - - def shortcut(self, input, ch_out, stride, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - if if_first: - return self.conv_bn_layer(input, ch_out, 1, stride) - else: - return self.conv_bn_layer_new(input, ch_out, 1, stride) - else: - return input - def bottleneck_block(self, input, num_filters, stride, if_first): - conv0 = self.conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu') - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu') - conv2 = self.conv_bn_layer( - input=conv1, num_filters=num_filters * 4, filter_size=1, act=None) - short = self.shortcut(input, num_filters * 4, stride, if_first=if_first) - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - -def ResNet50_vd_v0_embedding(): - model = ResNet_vd(layers=50, is_3x3 = True) - return model -def ResNet101_vd_v0_embedding(): - model = ResNet_vd(layers=101, is_3x3 = True) - return model -def ResNet152_vd_v0_embedding(): - model = ResNet_vd(layers=152, is_3x3 = True) - return model \ No newline at end of file diff --git a/PaddleCV/Research/landmark/inference/models/se_resnext_vd_embedding.py b/PaddleCV/Research/landmark/inference/models/se_resnext_vd_embedding.py deleted file mode 100644 index 629dee0170e1e2160b360ea8db74bd7cca713bf8..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/models/se_resnext_vd_embedding.py +++ /dev/null @@ -1,241 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle -import paddle.fluid as fluid -import math -from paddle.fluid.param_attr import ParamAttr -__all__ = [ - "SE_ResNeXt", "SE_ResNeXt50_32x4d_vd_embedding", "SE_ResNeXt101_32x4d_vd_embedding", - "SE_ResNeXt152_64x4d_vd_embedding" -] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [10, 16, 20], - "steps": [0.01, 0.001, 0.0001, 0.00001] - } -} -class SE_ResNeXt(): - def __init__(self, layers=50): - self.params = train_parameters - self.layers = layers - def net(self, input, embedding_size=256): - layers = self.layers - supported_layers = [50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - endpoints = {} - if layers == 50: - cardinality = 32 - reduction_ratio = 16 - depth = [3, 4, 6, 3] - num_filters = [128, 256, 512, 1024] - conv = self.conv_bn_layer( - input=input, num_filters=64, filter_size=3, stride=2, act='relu', name='conv1_1') - conv = self.conv_bn_layer( - input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_2') - conv = self.conv_bn_layer( - input=conv, num_filters=128, filter_size=3, stride=1, act='relu', name='conv1_3') - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - elif layers == 101: - cardinality = 32 - reduction_ratio = 16 - depth = [3, 4, 23, 3] - num_filters = [128, 256, 512, 1024] - conv = self.conv_bn_layer( - input=input, num_filters=64, filter_size=3, stride=2, act='relu', name='conv1_1') - conv = self.conv_bn_layer( - input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_2') - conv = self.conv_bn_layer( - input=conv, num_filters=128, filter_size=3, stride=1, act='relu', name='conv1_3') - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - elif layers == 152: - cardinality = 64 - reduction_ratio = 16 - depth = [3, 8, 36, 3] - num_filters = [256, 512, 1024, 2048] - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=3, - stride=2, - act='relu', - name='conv1_1') - conv = self.conv_bn_layer( - input=conv, num_filters=64, filter_size=3, stride=1, act='relu',name='conv1_2') - conv = self.conv_bn_layer( - input=conv, - num_filters=128, - filter_size=3, - stride=1, - act='relu', - name='conv1_3') - conv = fluid.layers.pool2d( - input=conv, pool_size=3, pool_stride=2, pool_padding=1, \ - pool_type='max') - n = 1 if layers == 50 or layers == 101 else 3 - for block in range(len(depth)): - n += 1 - for i in range(depth[block]): - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=cardinality, - reduction_ratio=reduction_ratio, - if_first=block==0, - name=str(n)+'_'+str(i+1)) - pool = fluid.layers.pool2d( - input=conv, pool_size=14, pool_type='avg', global_pooling=True) - if embedding_size > 0: - embedding = fluid.layers.fc(input=pool, size=embedding_size) - endpoints['embedding'] = embedding - else: - endpoints['embedding'] = pool - return endpoints - def shortcut(self, input, ch_out, stride, name, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - filter_size = 1 - if if_first: - return self.conv_bn_layer(input, ch_out, filter_size, stride, name='conv'+name+'_prj') - else: - return self.conv_bn_layer_new(input, ch_out, filter_size, stride, name='conv'+name+'_prj') - else: - return input - def bottleneck_block(self, input, num_filters, stride, cardinality, - reduction_ratio,if_first, name=None): - conv0 = self.conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu',name='conv'+name+'_x1') - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - groups=cardinality, - act='relu', - name='conv'+name+'_x2') - if cardinality == 64: - num_filters = num_filters / 2 - conv2 = self.conv_bn_layer( - input=conv1, num_filters=num_filters * 2, filter_size=1, act=None, name='conv'+name+'_x3') - scale = self.squeeze_excitation( - input=conv2, - num_channels=num_filters * 2, - reduction_ratio=reduction_ratio, - name='fc'+name) - short = self.shortcut(input, num_filters * 2, stride, if_first=if_first, name=name) - return fluid.layers.elementwise_add(x=short, y=scale, act='relu') - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) / 2, - groups=groups, - act=None, - bias_attr=False, - param_attr=ParamAttr(name=name + '_weights'), - ) - bn_name = name + "_bn" - return fluid.layers.batch_norm(input=conv, act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d(input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - pool_type='avg') - - conv = fluid.layers.conv2d( - input=pool, - num_filters=num_filters, - filter_size=filter_size, - stride=1, - padding=(filter_size - 1) / 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - bn_name = name + "_bn" - return fluid.layers.batch_norm(input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def squeeze_excitation(self, input, num_channels, reduction_ratio, name=None): - pool = fluid.layers.pool2d( - input=input, pool_size=0, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - squeeze = fluid.layers.fc(input=pool, - size=num_channels / reduction_ratio, - act='relu', - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform( - -stdv, stdv),name=name+'_sqz_weights'), - bias_attr=ParamAttr(name=name+'_sqz_offset')) - stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) - excitation = fluid.layers.fc(input=squeeze, - size=num_channels, - act='sigmoid', - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform( - -stdv, stdv),name=name+'_exc_weights'), - bias_attr=ParamAttr(name=name+'_exc_offset')) - scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) - return scale -def SE_ResNeXt50_32x4d_vd_embedding(): - model = SE_ResNeXt(layers=50) - return model -def SE_ResNeXt101_32x4d_vd_embedding(): - model = SE_ResNeXt(layers=101) - return model -def SE_ResNeXt152_64x4d_vd_embedding(): - model = SE_ResNeXt(layers=152) - return model diff --git a/PaddleCV/Research/landmark/inference/set_env.sh b/PaddleCV/Research/landmark/inference/set_env.sh deleted file mode 100644 index 4e9fd43c03fdd55858d763773ba156ee2e33e249..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/set_env.sh +++ /dev/null @@ -1,3 +0,0 @@ -export CUDA_VISIBLE_DEVICES=0 -export FLAGS_fraction_of_gpu_memory_to_use=0.8 -export LD_LIBRARY_PATH=./so:$LD_LIBRARY_PATH diff --git a/PaddleCV/Research/landmark/inference/test_data/0.jpg b/PaddleCV/Research/landmark/inference/test_data/0.jpg deleted file mode 100644 index f72999456bd6f7cdda065b0e8ed03f83e2b2db74..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/landmark/inference/test_data/0.jpg and /dev/null differ diff --git a/PaddleCV/Research/landmark/inference/test_data/1.jpg b/PaddleCV/Research/landmark/inference/test_data/1.jpg deleted file mode 100644 index 227a9b210c7073d3470e0c4d591b69351625e51b..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/landmark/inference/test_data/1.jpg and /dev/null differ diff --git a/PaddleCV/Research/landmark/inference/test_data/2.jpg b/PaddleCV/Research/landmark/inference/test_data/2.jpg deleted file mode 100644 index ec85039d53cfb65e159231501e568eb6dbdf43c9..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/landmark/inference/test_data/2.jpg and /dev/null differ diff --git a/PaddleCV/Research/landmark/inference/test_data/2e44b31818acc600.jpeg b/PaddleCV/Research/landmark/inference/test_data/2e44b31818acc600.jpeg deleted file mode 100644 index a95cf23758c3fbb2de0ffd8309e4e1bf9fd53151..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/landmark/inference/test_data/2e44b31818acc600.jpeg and /dev/null differ diff --git a/PaddleCV/Research/landmark/inference/test_data/3.jpg b/PaddleCV/Research/landmark/inference/test_data/3.jpg deleted file mode 100644 index 5988cfa14a44810c16630c98910ca9a99dabb830..0000000000000000000000000000000000000000 Binary files a/PaddleCV/Research/landmark/inference/test_data/3.jpg and /dev/null differ diff --git a/PaddleCV/Research/landmark/inference/utility.py b/PaddleCV/Research/landmark/inference/utility.py deleted file mode 100644 index 9fb64161fbdd2198cc2f74ed9abf2dc82ce23728..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/inference/utility.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import time -import os -import subprocess -import distutils.util -import numpy as np -from paddle.fluid import core - - -def print_arguments(args): - """Print argparse's arguments. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - parser.add_argument("name", default="Jonh", type=str, help="User name.") - args = parser.parse_args() - print_arguments(args) - - :param args: Input argparse.Namespace for printing. - :type args: argparse.Namespace - """ - print("----------- Configuration Arguments -----------") - for arg, value in sorted(vars(args).iteritems()): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") - - -def add_arguments(argname, type, default, help, argparser, **kwargs): - """Add argparse's argument. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - add_argument("name", str, "Jonh", "User name.", parser) - args = parser.parse_args() - """ - type = distutils.util.strtobool if type == bool else type - argparser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) - - -def fmt_time(): - """ get formatted time for now - """ - now_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) - return now_str - - -def get_gpunum(): - """ get number of gpu devices - """ - visibledevice = os.getenv('CUDA_VISIBLE_DEVICES') - if visibledevice: - devicenum = len(visibledevice.split(',')) - else: - devicenum = subprocess.check_output(['nvidia-smi', '-L']).count('\n') - return devicenum - - diff --git a/PaddleCV/Research/landmark/pypredict/Makefile b/PaddleCV/Research/landmark/pypredict/Makefile deleted file mode 100644 index e829e671a70628b75b76eacddd02615e753fc522..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/pypredict/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -.PHONY:clean - -LDFLAGS = -fPIC -shared -Wl,--rpath=\$ORIGIN -Wl,--rpath=\$ORIGIN/../so -Wl,-z,origin - -LIBS = ./so/libpaddle_fluid.so ./so/libmkldnn.so.0 ./so/libmklml_intel.so ./so/libiomp5.so -L $(PYTHONHOME)/lib -lpython2.7 - -CFLAGS= -fPIC -I pybind11/include -I ./fluid_inference/third_party/install/glog/include -I./fluid_inference/third_party/install/gflags/include -I ./fluid_inference/paddle/include/ -I. -I./fluid_inference/third_party/boost/ -I$(PYTHONHOME)/include/python2.7 -std=c++11 -OBJS = predictor.o\ - py_cnnpredict.o\ - conf_parser.o - -PyCNNPredict.so: $(OBJS) - g++ $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS) -%.o : %.cpp - g++ $(CFLAGS) -c $^ -o $@ - -clean: - rm -rf $(OBJS) PyCNNPredict.so - \ No newline at end of file diff --git a/PaddleCV/Research/landmark/pypredict/README.md b/PaddleCV/Research/landmark/pypredict/README.md deleted file mode 100644 index 50a2b44fbd320149d425be73f7ecef16e4f564d2..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/pypredict/README.md +++ /dev/null @@ -1,21 +0,0 @@ -# Accelerated Infer Project - -This project is to accelerate the prediction of cnn.We need to compile theproject. - -## Environment - -Python2.7, Numpy - -## Compile The Entire C++ Project - -first open build.sh, and you need to set PYTHONHOME nev in build.sh -``` - export PYTHONHOME=/your/python/home - sh build.sh -``` - -so folder will appear, This is the c++ program used to speed up the prediction. -then you can copy the so file to ../inference to predict models -``` - mv so ../inference -``` diff --git a/PaddleCV/Research/landmark/pypredict/build.sh b/PaddleCV/Research/landmark/pypredict/build.sh deleted file mode 100644 index cd216440c3023bf8bed2df2c7bc2e4b5c334d556..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/pypredict/build.sh +++ /dev/null @@ -1,19 +0,0 @@ -set -x -#http://www.paddlepaddle.org/documentation/docs/en/1.4/advanced_usage/deploy/inference/build_and_install_lib_en.html -alias wget='wget --no-check-certificat ' -alias git="/usr/bin/git" - -#set python home -export PYTHONHOME=~/.jumbo - -wget https://paddle-inference-lib.bj.bcebos.com/1.4.1-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz -tar -xzf fluid_inference.tgz -mkdir so -cp `find fluid_inference -name '*.so*'` so/ - -git clone https://github.com/pybind/pybind11.git -cd pybind11 && git checkout v2.2.4 && cd - - -make - -mv PyCNNPredict.so so diff --git a/PaddleCV/Research/landmark/pypredict/cnnpredict_interface.h b/PaddleCV/Research/landmark/pypredict/cnnpredict_interface.h deleted file mode 100644 index c8cb4fd341f39c83c26ccb2e59ae7f5b0073d401..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/pypredict/cnnpredict_interface.h +++ /dev/null @@ -1,119 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -enum class DataType : int { - INT8 = 0, - INT32 = 2, - INT64 = 3, - FLOAT32 = 4, -}; - -inline size_t get_type_size(DataType type) { - switch (type) { - case DataType::INT8: - return sizeof(int8_t); - case DataType::INT32: - return sizeof(int32_t); - case DataType::INT64: - return sizeof(int64_t); - case DataType::FLOAT32: - return sizeof(float); - default: - return 0; - } -} - -struct DataBuf { - std::size_t size; - DataType type; - std::shared_ptr data; - - DataBuf() = default; - - DataBuf(DataType dtype, size_t dsize) { alloc(dtype, dsize); } - - DataBuf(const void *ddata, DataType dtype, size_t dsize) { - alloc(dtype, dsize); - copy(ddata, dsize); - } - - DataBuf(const DataBuf &dbuf) - : size(dbuf.size), type(dbuf.type), data(dbuf.data) {} - - DataBuf &operator=(const DataBuf &dbuf) { - size = dbuf.size; - type = dbuf.type; - data = dbuf.data; - return *this; - } - - void reset(const void *ddata, size_t dsize) { - clear(); - alloc(type, dsize); - copy(ddata, dsize); - } - - void clear() { - size = 0; - data.reset(); - } - - ~DataBuf() { clear(); } - - private: - void alloc(DataType dtype, size_t dsize) { - type = dtype; - size = dsize; - data.reset(new char[dsize * get_type_size(dtype)], - std::default_delete()); - } - - void copy(const void *ddata, size_t dsize) { - const char *temp = reinterpret_cast(ddata); - std::copy(temp, temp + dsize * get_type_size(type), data.get()); - } -}; - -struct Tensor { - std::string name; - std::vector shape; - std::vector> lod; - DataBuf data; -}; - -class ICNNPredict { - public: - ICNNPredict() {} - virtual ~ICNNPredict() {} - - virtual ICNNPredict *clone() = 0; - - virtual bool predict(const std::vector &inputs, - const std::vector &layers, - std::vector &outputs) = 0; - - virtual bool predict(const std::vector> &input_datas, - const std::vector> &input_shapes, - const std::vector &layers, - std::vector> &output_datas, - std::vector> &output_shapes) = 0; - - virtual void destroy(std::vector &tensors) { - std::vector().swap(tensors); - } - - virtual void destroy(std::vector> &datas) { - std::vector>().swap(datas); - } - - virtual void destroy(std::vector> &shapes) { - std::vector>().swap(shapes); - } -}; - -ICNNPredict *create_cnnpredict(const std::string &conf_file, - const std::string &prefix); diff --git a/PaddleCV/Research/landmark/pypredict/common.h b/PaddleCV/Research/landmark/pypredict/common.h deleted file mode 100644 index accdc9cf97f491fce45ce12f9ab89c57015769ab..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/pypredict/common.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -static inline bool file_exist(const std::string &file_name) { - return ((access(file_name.c_str(), 0)) != -1) ? true : false; -} - -template -static inline bool str2num(const std::string &str, T &num) { - std::istringstream istr(str); - istr >> num; - return !istr.fail(); -}; - -template -static bool strs2nums(const std::vector &strs, - std::vector &nums) { - nums.resize(strs.size()); - for (size_t i = 0; i < strs.size(); i++) { - if (!str2num(strs[i], nums[i])) { - nums.clear(); - return false; - } - } - - return true; -}; - -template -static inline std::string num2str(T a) { - std::stringstream istr; - istr << a; - return istr.str(); -} diff --git a/PaddleCV/Research/landmark/pypredict/conf_parser.cpp b/PaddleCV/Research/landmark/pypredict/conf_parser.cpp deleted file mode 100644 index 2debb3c87c6a4b53ebd346b1638f036e0acf3ac5..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/pypredict/conf_parser.cpp +++ /dev/null @@ -1,331 +0,0 @@ -#include -#include -#include -#include -#include "logger.h" - -#include "common.h" -#include "conf_parser.h" - -std::string join_string(const std::string &prefix, const std::string &str) { - if (prefix.empty() && str.empty()) { - return ""; - } else if (prefix.empty()) { - return str; - } else if (str.empty()) { - return prefix; - } - - return prefix + str; -} - -bool read_text_file(const std::string &file_name, std::string &str) { - LOG(INFO) << "read_text_file!"; - - if (!file_exist(file_name)) { - LOG(FATAL) << "file: " << file_name << "is not exist!"; - return false; - } - - std::ifstream ifs(file_name.c_str(), std::ios::binary); - if (!ifs) { - LOG(FATAL) << "fail to open " << file_name; - return false; - } - - std::stringstream ss; - ss << ifs.rdbuf(); - str = ss.str(); - - return true; -} - -std::vector split_str(const std::string &str, - const std::string &sep, - bool suppress_blanks) { - std::vector array; - size_t position = 0; - size_t last_position = 0; - - last_position = position = 0; - while (position + sep.size() <= str.size()) { - if (str[position] == sep[0] && str.substr(position, sep.size()) == sep) { - if (!suppress_blanks || position - last_position > 0) { - array.push_back(str.substr(last_position, position - last_position)); - } - last_position = position = position + sep.size(); - } else { - position++; - } - } - - if (!suppress_blanks || last_position - str.size()) { - array.push_back(str.substr(last_position)); - } - - return array; -} - -void strip(std::string &s) { - if (s.empty()) { - return; - } - - s.erase(remove_if(s.begin(), s.end(), isspace), s.end()); - - if (s.size() == 1 && - (s[0] == ' ' || s[0] == '\t' || s[0] == '\n' || s[0] == '\r')) { - s = ""; - } - - int begin = -1; - int end = 0; - for (size_t i = 0; i < s.length(); i++) { - if (!(s[i] == ' ' || s[i] == '\t' || s[i] == '\n' || s[i] == '\r')) { - begin = i; - break; - } - } - - if (begin < 0) { - s = ""; - return; - } - - for (int i = s.length() - 1; i >= 0; i--) { - if (!(s[i] == ' ' || s[i] == '\t' || s[i] == '\n' || s[i] == '\r')) { - end = i; - break; - } - } - - if (((int)s.size()) != end - begin + 1) { - s = s.substr(begin, end - begin + 1); - } -} - -bool ConfParserBase::load(const std::string &file_name) { - std::string str; - if (!read_text_file(file_name, str)) { - LOG(FATAL) << "fail to read " << file_name; - return false; - } - - load_from_string(str); - - return true; -} - -bool ConfParserBase::load_from_string(const std::string &str) { - map_clear(); - std::vector lines = split_str(str, "\n", true); - - int count = 0; - for (size_t i = 0; i < lines.size(); i++) { - if (parse_line(lines[i])) { - count++; - } - } - - return (count > 0); -} - -bool ConfParserBase::get_conf_float(const std::string &key, - float &value) const { - MapIter it = _map.find(key); - if (it == _map.end()) { - return false; - } - - float temp = 0; - if (!str2num(it->second, temp)) { - LOG(WARNING) << "failure to convert " << it->second << " to float"; - return false; - } - - value = temp; - - return true; -} - -bool ConfParserBase::get_conf_uint(const std::string &key, - unsigned int &value) const { - MapIter it = _map.find(key); - if (it == _map.end()) { - LOG(WARNING) << "fail to get: " << key; - return false; - } - - unsigned int temp = 0; - if (!str2num(it->second, temp)) { - LOG(ERROR) << "fail to convert " << it->second << " to float"; - return false; - } - - value = temp; - - return true; -} - -bool ConfParserBase::get_conf_int(const std::string &key, int &value) const { - MapIter it = _map.find(key); - if (it == _map.end()) { - return false; - } - - int temp = 0; - if (!str2num(it->second, temp)) { - LOG(ERROR) << "fail to convert " << it->second << " to float"; - return false; - } - - value = temp; - - return true; -} - -bool ConfParserBase::get_conf_str(const std::string &key, - std::string &value) const { - MapIter it = _map.find(key); - if (it == _map.end()) { - LOG(WARNING) << "fail to get: " << key; - return false; - } else { - value = it->second; - } - - return true; -} - -bool ConfParserBase::exist(const char *name) const { - return _map.find(name) != _map.end(); -} - -void ConfParserBase::map_clear() { _map.clear(); } - -bool ConfParserBase::parse_line(const std::string &line) { - std::string strip_line = line; - strip(strip_line); - if (strip_line.empty() || strip_line[0] == '#' || strip_line[0] == ';') { - return false; - } - - std::basic_string::size_type index_pos = strip_line.find(':'); - if (index_pos == std::string::npos) { - LOG(ERROR) << "wrong setting format of line: " << line; - return false; - } - - std::string key = strip_line.substr(0, index_pos); - std::string value = - strip_line.substr(index_pos + 1, strip_line.size() - index_pos - 1); - if (!_map.insert(std::pair(key, value)).second) { - LOG(WARNING) << "value already exist for key: " << key; - return false; - } - - return true; -} - -ConfParser::~ConfParser() { - if (NULL != _conf) { - delete _conf; - _conf = NULL; - } -} - -bool ConfParser::init(const std::string &conf_file) { - _conf = new ConfParserBase(); - if (!_conf->load(conf_file)) { - LOG(FATAL) << "fail to laod conf file: " << conf_file; - return false; - } - - return true; -} - -bool ConfParser::get_uint(const std::string &prefix, - const std::string &key, - unsigned int &value) const { - std::string pre_key = join_string(prefix, key); - if (!_conf->get_conf_uint(pre_key, value)) { - return false; - } - - return true; -} - -bool ConfParser::get_uints(const std::string &prefix, - const std::string &key, - std::vector &values) const { - std::vector str_values; - get_strings(prefix, key, str_values); - - return strs2nums(str_values, values); -} - -bool ConfParser::get_int(const std::string &prefix, - const std::string &key, - int &value) const { - std::string pre_key = join_string(prefix, key); - if (!_conf->get_conf_int(pre_key, value)) { - return false; - } - - return true; -} - -bool ConfParser::get_ints(const std::string &prefix, - const std::string &key, - std::vector &values) const { - std::vector str_values; - get_strings(prefix, key, str_values); - - return strs2nums(str_values, values); -} - -bool ConfParser::get_float(const std::string &prefix, - const std::string &key, - float &value) const { - std::string pre_key = join_string(prefix, key); - if (!_conf->get_conf_float(pre_key, value)) { - return false; - } - - return true; -} - -bool ConfParser::get_floats(const std::string &prefix, - const std::string &key, - std::vector &values) const { - std::vector str_values; - get_strings(prefix, key, str_values); - - return strs2nums(str_values, values); -} - -bool ConfParser::get_string(const std::string &prefix, - const std::string &key, - std::string &value) const { - std::string pre_key = join_string(prefix, key); - if (!_conf->get_conf_str(pre_key, value)) { - return false; - } - - return true; -} - -bool ConfParser::get_strings(const std::string &prefix, - const std::string &key, - std::vector &values) const { - std::string pre_key = join_string(prefix, key); - std::string value; - if (!_conf->get_conf_str(pre_key, value)) { - return false; - } - - std::vector split_value = split_str(value, ",", true); - values.swap(split_value); - - return true; -} diff --git a/PaddleCV/Research/landmark/pypredict/conf_parser.h b/PaddleCV/Research/landmark/pypredict/conf_parser.h deleted file mode 100644 index db828c04da698bc6cc9de1be6f6d16d1dce1fb3b..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/pypredict/conf_parser.h +++ /dev/null @@ -1,76 +0,0 @@ -#pragma once - -#include -#include - -typedef std::map Map; -typedef Map::const_iterator MapIter; - -class ConfParserBase { - public: - ConfParserBase() {} - - bool load(const std::string &file_name); - - bool load_from_string(const std::string &str); - - bool get_conf_float(const std::string &key, float &value) const; - - bool get_conf_uint(const std::string &key, unsigned int &value) const; - - bool get_conf_int(const std::string &key, int &value) const; - - bool get_conf_str(const std::string &key, std::string &value) const; - - bool exist(const char *name) const; - - void map_clear(); - - private: - bool parse_line(const std::string &line); - - Map _map; -}; - -class ConfParser { - public: - ConfParser() : _conf(NULL){}; - ~ConfParser(); - - bool init(const std::string &conf_file); - - bool get_uint(const std::string &prefix, - const std::string &key, - unsigned int &value) const; - - bool get_uints(const std::string &prefix, - const std::string &key, - std::vector &values) const; - - bool get_int(const std::string &prefix, - const std::string &key, - int &value) const; - - bool get_ints(const std::string &prefix, - const std::string &key, - std::vector &values) const; - - bool get_float(const std::string &prefix, - const std::string &key, - float &value) const; - - bool get_floats(const std::string &prefix, - const std::string &key, - std::vector &values) const; - - bool get_string(const std::string &prefix, - const std::string &key, - std::string &value) const; - - bool get_strings(const std::string &prefix, - const std::string &key, - std::vector &values) const; - - public: - ConfParserBase *_conf; -}; diff --git a/PaddleCV/Research/landmark/pypredict/logger.h b/PaddleCV/Research/landmark/pypredict/logger.h deleted file mode 100644 index d0ba1c75deefc6f3c9732f9857145bcaf5e4bae7..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/pypredict/logger.h +++ /dev/null @@ -1,56 +0,0 @@ -#pragma once - -#include -#include - -// compatiable with glog -enum { - INFO = 0, - WARNING = 1, - ERROR = 2, - FATAL = 3, -}; - -struct NullStream : std::ostream { - NullStream() : std::ios(0), std::ostream(0) {} -}; - -class Logger { - public: - Logger(const char *filename, int lineno, int loglevel) { - static const char *log_levels[] = {"INFO ", "WARN ", "ERROR", "FATAL"}; - - static NullStream nullstream; - _loglevel = loglevel; - _logstream = (_loglevel >= getloglevel()) ? &std::cerr : &nullstream; - (*_logstream) << log_levels[_loglevel] << ":" << filename << "[" << lineno - << "]"; - } - static inline int &getloglevel() { - // default initialized with glog env - static int globallevel = getgloglevel(); - return globallevel; - } - static inline void setloglevel(int loglevel) { getloglevel() = loglevel; } - static int getgloglevel() { - char *env = getenv("GLOG_minloglevel"); - int level = WARNING; - if (env != NULL) { - int num = 0; - std::istringstream istr(env); - istr >> num; - if (!istr.fail()) { - level = num; - } - } - return level; - } - ~Logger() { *_logstream << std::endl; } - std::ostream &getstream() { return *_logstream; } - - protected: - int _loglevel; - std::ostream *_logstream; -}; - -#define LOG(loglevel) Logger(__FILE__, __LINE__, loglevel).getstream() diff --git a/PaddleCV/Research/landmark/pypredict/predictor.cpp b/PaddleCV/Research/landmark/pypredict/predictor.cpp deleted file mode 100644 index c27ca464213867f67e4f039672446d2367821b76..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/pypredict/predictor.cpp +++ /dev/null @@ -1,478 +0,0 @@ -#include -#include -#include "logger.h" - -#include "conf_parser.h" -#include "predictor.h" - -Predictor::~Predictor() {} - -bool feed(paddle::PaddlePredictor *predictor, - const std::vector &tensors) { - LOG(INFO) << "Predictor::feed"; - - auto names = predictor->GetInputNames(); - if (names.size() != tensors.size()) { - LOG(WARNING) << "The given size " << tensors.size() - << " is not equal to the required size " << names.size(); - return false; - } - - for (size_t i = 0; i < names.size(); ++i) { - auto i_t = predictor->GetInputTensor(names[i]); - i_t->Reshape(tensors[i].shape); - i_t->SetLoD(tensors[i].lod); - - if (tensors[i].data.type == DataType::FLOAT32) { - const float *temp = - reinterpret_cast(tensors[i].data.data.get()); - i_t->copy_from_cpu(temp); - } else if (tensors[i].data.type == DataType::INT32) { - const int32_t *temp = - reinterpret_cast(tensors[i].data.data.get()); - i_t->copy_from_cpu(temp); - } else if (tensors[i].data.type == DataType::INT64) { - const int64_t *temp = - reinterpret_cast(tensors[i].data.data.get()); - i_t->copy_from_cpu(temp); - } else { - LOG(ERROR) << "do not support current datatype"; - return false; - } - } - - return true; -} - -bool fetch(paddle::PaddlePredictor *predictor, std::vector &tensors) { - LOG(INFO) << "Predictor::fetch"; - - auto names = predictor->GetOutputNames(); - for (auto &name : names) { - auto o_t = predictor->GetOutputTensor(name); - std::vector s = o_t->shape(); - - Tensor out; - out.shape = s; - out.lod = o_t->lod(); - - int num = std::accumulate(s.begin(), s.end(), 1, std::multiplies()); - - if (o_t->type() == paddle::PaddleDType::FLOAT32) { - out.data = DataBuf(DataType::FLOAT32, size_t(num)); - float *p_data = reinterpret_cast(out.data.data.get()); - o_t->copy_to_cpu(p_data); - } else if (o_t->type() == paddle::PaddleDType::INT32) { - out.data = DataBuf(DataType::INT32, size_t(num)); - int32_t *p_data = reinterpret_cast(out.data.data.get()); - o_t->copy_to_cpu(p_data); - } else if (o_t->type() == paddle::PaddleDType::INT64) { - out.data = DataBuf(DataType::INT64, size_t(num)); - int64_t *p_data = reinterpret_cast(out.data.data.get()); - o_t->copy_to_cpu(p_data); - } else { - LOG(ERROR) << "do no support current datatype"; - return false; - } - - tensors.push_back(out); - } - - return true; -} - -bool Predictor::predict(const std::vector &inputs, - const std::vector &layers, - std::vector &outputs) { - LOG(INFO) << "Predictor::predict"; - (void)layers; - // 1. feed input - if (!feed(_predictor.get(), inputs)) { - return false; - } - - // 2. execute inference - if (!_predictor->ZeroCopyRun()) { - LOG(WARNING) << "fail to execute predictor"; - return false; - } - - // 3. fetch output - if (!fetch(_predictor.get(), outputs)) { - return false; - } - return true; -} - -bool check_shape(const std::vector> &datas, - const std::vector> &shapes) { - LOG(INFO) << "check_shape"; - if (datas.size() != shapes.size()) { - LOG(ERROR) << "datas size: " << datas.size() << " != " - << "shapes size(): " << shapes.size(); - return false; - } - for (size_t i = 0; i < datas.size(); ++i) { - int count = 1; - for (auto num : shapes[i]) { - count *= num; - } - int data_size = static_cast(datas[i].size()); - if (count != data_size) { - LOG(ERROR) << "data[" << i << "] size " << data_size << " != " - << "shape [" << i << "] size " << count; - return false; - } - } - return true; -} - -bool feed(paddle::PaddlePredictor *predictor, - const std::vector> &datas, - const std::vector> &shapes) { - LOG(INFO) << "Predictor::feed"; - - // 1. check input shape - if (!check_shape(datas, shapes)) { - return false; - } - - // 2. check given input and required input - auto names = predictor->GetInputNames(); - if (names.size() != datas.size()) { - LOG(WARNING) << "The given size " << datas.size() - << " is not equal to the required size " << names.size(); - return false; - } - - // 3. feed - for (size_t i = 0; i < names.size(); ++i) { - auto i_t = predictor->GetInputTensor(names[i]); - i_t->Reshape(shapes[i]); - i_t->copy_from_cpu(datas[i].data()); - } - - return true; -} - -bool fetch(paddle::PaddlePredictor *predictor, - std::vector> &datas, - std::vector> &shapes) { - LOG(INFO) << "Predictor::fetch"; - - auto names = predictor->GetOutputNames(); - for (auto &name : names) { - auto o_t = predictor->GetOutputTensor(name); - std::vector s = o_t->shape(); - shapes.push_back(s); - - int num = std::accumulate(s.begin(), s.end(), 1, std::multiplies()); - - std::vector data(num); - o_t->copy_to_cpu(data.data()); - datas.push_back(data); - } - - return true; -} - -bool Predictor::predict(const std::vector> &input_datas, - const std::vector> &input_shapes, - const std::vector &layers, - std::vector> &output_datas, - std::vector> &output_shapes) { - LOG(INFO) << "Predictor::predict"; - (void)layers; - - // 1. feed input - if (!feed(_predictor.get(), input_datas, input_shapes)) { - return false; - } - - // 2. execute inference - if (!_predictor->ZeroCopyRun()) { - LOG(WARNING) << "fail to execute predictor"; - return false; - } - - // 3. fetch output - if (!fetch(_predictor.get(), output_datas, output_shapes)) { - return false; - } - - return true; -} - -void init_tensorrt(const ConfParser *conf, - const std::string &prefix, - AnalysisConfig &config) { - LOG(INFO) << "Predictor::init_tensorrt()"; - - // 1. max_batch_size for tensorrt - int max_batch_size = 1; - if (!conf->get_int(prefix, "max_batch_size", max_batch_size)) { - LOG(WARNING) << "fail to get max_batch_size from conf, set as 1"; - } - max_batch_size = std::max(1, max_batch_size); - - // 2. workspace_size for tensorrt - int workspace_size = 0; - if (!conf->get_int(prefix, "workspace_size", workspace_size)) { - LOG(WARNING) << "fail to get workspace_size from conf, set as 0"; - } - workspace_size = std::max(0, workspace_size); - - // 3. min_subgraph_size for tensorrt - int min_subgraph_size = 3; - if (!conf->get_int(prefix, "min_subgraph_size", min_subgraph_size)) { - LOG(WARNING) << "fail to get min_subgraph_size from conf, set as 3"; - } - min_subgraph_size = std::max(0, min_subgraph_size); - - config.EnableTensorRtEngine( - workspace_size, max_batch_size, min_subgraph_size); -} - -void init_anakin(const ConfParser *conf, - const std::string &prefix, - AnalysisConfig &config) { - LOG(INFO) << "Predictor::init_anakin()"; - - // 1. max_batch_size for tensorrt - int max_batch_size = 1; - if (!conf->get_int(prefix, "max_batch_size", max_batch_size)) { - LOG(WARNING) << "fail to get max_batch_size from conf, set as 1"; - } - max_batch_size = std::max(1, max_batch_size); - - std::map> anakin_max_input_dict; - std::vector input_names; - if (!conf->get_strings(prefix, "input_names", input_names)) { - LOG(WARNING) << "fail to get input_names from conf"; - } - for (auto &n : input_names) { - std::vector shape; - if (!conf->get_ints(prefix, n, shape)) { - LOG(WARNING) << "fail to get the shape of " + n; - } else { - anakin_max_input_dict[n] = shape; - } - } - - config.EnableAnakinEngine(max_batch_size, anakin_max_input_dict); - config.pass_builder()->TurnOnDebug(); -} - -void init_gpu(const ConfParser *conf, - const std::string &prefix, - int device, - AnalysisConfig &config) { - LOG(INFO) << "Predictor::init_gpu()"; - - // 1. GPU memeroy - uint32_t gpu_memory_mb = 1024; - if (!conf->get_uint(prefix, "gpu_memory_mb", gpu_memory_mb)) { - LOG(WARNING) << "fail to get gpu_memory_mb from conf, set as 1024"; - } - config.EnableUseGpu(gpu_memory_mb, device); - - // 2. use_tensorrt - std::string infer_engine; - if (!conf->get_string(prefix, "infer_engine", infer_engine)) { - LOG(WARNING) << "disable infer engine"; - return; - } else if (infer_engine == "tensorrt") { - init_tensorrt(conf, prefix + "tensorrt_", config); - } else if (infer_engine == "anakin") { - init_anakin(conf, prefix + "anakin_", config); - } else { - LOG(WARNING) << "unknwon infer engine"; - return; - } -} - -void init_cpu(const ConfParser *conf, - const std::string &prefix, - AnalysisConfig &config) { - LOG(INFO) << "Predictor::init_cpu()"; - - config.DisableGpu(); - - // 1. cpu_math_library (such as mkl/openblas) num_threads - int num_threads = 1; - if (!conf->get_int(prefix, "num_threads", num_threads)) { - LOG(WARNING) << "fail to get num_threads conf, set as 1"; - } - num_threads = std::max(1, num_threads); - config.SetCpuMathLibraryNumThreads(num_threads); - - // 2. use_mkldnn - int use_mkldnn = -1; - if (conf->get_int(prefix, "use_mkldnn", use_mkldnn) && use_mkldnn > 0) { - config.EnableMKLDNN(); - } -} - -bool init_model(const ConfParser *conf, - const std::string &prefix, - AnalysisConfig &config) { - LOG(INFO) << "Predictor::init_model()"; - - std::string prog_file; - if (!conf->get_string(prefix, "prog_file", prog_file)) { - LOG(WARNING) << "fail to get prog_file from conf"; - } - - std::string param_file; - if (!conf->get_string(prefix, "param_file", param_file)) { - LOG(WARNING) << "fail to get param_file from conf"; - } - - if (!prog_file.empty() && !param_file.empty()) { - if (!file_exist(prog_file)) { - LOG(FATAL) << "file: " << prog_file << " is not exist"; - return false; - } - if (!file_exist(param_file)) { - LOG(FATAL) << "file: " << param_file << " is not exist"; - return false; - } - config.SetModel(prog_file, param_file); - return true; - } - - std::string model_path; - if (!conf->get_string(prefix, "model_path", model_path)) { - LOG(FATAL) << "fail to get model_path from conf"; - return false; - } - config.SetModel(model_path); - - return true; -} - -void show_version_info() { - static bool initialized = false; - if (initialized) { - return; - } - - LOG(INFO) << "[date:" << __DATE__ << "]" - << "[time:" << __TIME__ << "]"; - LOG(INFO) << "paddle " << paddle::get_version(); - - initialized = true; -} - -bool Predictor::init(const std::string &conf_file, const std::string &prefix) { - LOG(INFO) << "Predictor::init()"; - - show_version_info(); - - std::unique_ptr config(new AnalysisConfig()); - - std::unique_ptr conf(new ConfParser()); - if (!conf->init(conf_file)) { - LOG(FATAL) << "fail to load conf file: " << conf_file; - return false; - } - - // 1. Debug - if (!conf->get_int(prefix, "debug", _debug)) { - _debug = -1; - LOG(WARNING) << "fail to get debug from conf, set as -1"; - } - - // 2. init model - if (!init_model(conf.get(), prefix, *config.get())) { - LOG(FATAL) << "fail to init model"; - return false; - } - - // 3. enable_ir_optim - int ir_optim = -1; - if (!conf->get_int(prefix, "enable_ir_optim", ir_optim)) { - LOG(WARNING) << "fail to get enable_ir_optim from conf, set as false"; - } - config->SwitchIrOptim(ir_optim > 0); - - // 4. specify_input_name - int sp_input = -1; - if (!conf->get_int(prefix, "specify_input_name", sp_input)) { - LOG(WARNING) << "fail to get specify_input_name from conf, set as false"; - } - config->SwitchSpecifyInputNames(sp_input > 0); - - // 5. use zerocopy - config->SwitchUseFeedFetchOps(false); - - // 6. Device - int device = -1; - if (!conf->get_int(prefix, "device", device)) { - LOG(WARNING) << "fail to get device from conf"; - return false; - } - if (device < 0) { - LOG(INFO) << "use cpu!"; - init_cpu(conf.get(), prefix, *config.get()); - } else { - LOG(INFO) << "use gpu!"; - init_gpu(conf.get(), prefix, device, *config.get()); - } - - // 7. delete unused pass - std::vector passes; - if (conf->get_strings(prefix, "delete_pass", passes)) { - for (auto &p : passes) { - LOG(INFO) << "delete pass: " << p; - config->pass_builder()->DeletePass(p); - } - } - - // 8. create predictor - auto predictor = CreatePaddlePredictor(*config.get()); - if (NULL == predictor) { - LOG(ERROR) << "fail to create paddle predictor"; - return false; - } - _predictor = std::move(predictor); - - return true; -} - -bool Predictor::init_shared(Predictor *cls) { - LOG(INFO) << "Predictor::init_shared"; - - this->_predictor = std::move(cls->_predictor->Clone()); - if (NULL == this->_predictor) { - LOG(ERROR) << "fail to clone paddle predictor"; - return false; - } - - return true; -} - -ICNNPredict *Predictor::clone() { - LOG(INFO) << "Predictor::clone"; - Predictor *cls = new Predictor(); - - if (!cls->init_shared(this)) { - LOG(FATAL) << "fail to call cls->init_shared"; - delete cls; - return NULL; - } - return cls; -} - -ICNNPredict *create_cnnpredict(const std::string &conf_file, - const std::string &prefix) { - LOG(INFO) << "create_cnnpredict"; - Predictor *predictor = new Predictor(); - - if (!predictor->init(conf_file, prefix)) { - delete predictor; - return NULL; - } - - return predictor; -} diff --git a/PaddleCV/Research/landmark/pypredict/predictor.h b/PaddleCV/Research/landmark/pypredict/predictor.h deleted file mode 100644 index 3628b5ac388199218ff9e2a876f8224dcb609c08..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/pypredict/predictor.h +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once - -#include -#include -#include "cnnpredict_interface.h" -#include "common.h" -#include "paddle_inference_api.h" - -using paddle::CreatePaddlePredictor; -using paddle::AnalysisConfig; -using paddle::PaddleEngineKind; - -class Predictor : public ICNNPredict { - public: - Predictor() : _debug(0) {} - - virtual ~Predictor(); - - ICNNPredict *clone(); - - /** - * [init predict from conf] - * @param conf_file [conf file] - * @param prefix [prefix before every key] - * @return [true of fasle] - */ - bool init(const std::string &conf_file, const std::string &prefix); - - bool predict(const std::vector &inputs, - const std::vector &layers, - std::vector &outputs); - - bool predict(const std::vector> &input_datas, - const std::vector> &input_shapes, - const std::vector &layers, - std::vector> &output_datas, - std::vector> &output_shapes); - - private: - bool init_shared(Predictor *cls); - - int _debug; - std::unique_ptr _predictor; -}; diff --git a/PaddleCV/Research/landmark/pypredict/py_cnnpredict.cpp b/PaddleCV/Research/landmark/pypredict/py_cnnpredict.cpp deleted file mode 100644 index 6618ed8a6ffbde9372cb9e82abe3dd23e82d2c60..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/pypredict/py_cnnpredict.cpp +++ /dev/null @@ -1,131 +0,0 @@ -#include "logger.h" - -#include "cnnpredict_interface.h" -#include "common.h" -#include "py_cnnpredict.h" - -template -vector ndarray_to_vector(const py::array &nd) { - py::dtype datatype = nd.dtype(); - size_t nd_dim = nd.ndim(); - const auto *shape = nd.shape(); - size_t data_num = nd.size(); - // py::buffer_info buf = nd.request(); - - const T *nd_data = reinterpret_cast(nd.data(0)); - vector vec(data_num, 0); - std::copy(nd_data, nd_data + data_num, vec.begin()); - return vec; -} - -template -vector list_to_vector(py::list &list) { - vector vec; - for (size_t i = 0; i < py::len(list); i++) { - T l = py::cast(list[i]); - vec.push_back(l); - } - - return vec; -} - -template -vector> ndlist_to_vectors(py::list &ndlist) { - vector> vecs; - for (unsigned int i = 0; i < py::len(ndlist); i++) { - py::array nd = py::array(ndlist[i]); - vector vec = ndarray_to_vector(nd); - vecs.push_back(vec); - } - return vecs; -} - -template -py::array vector_to_ndarray(const vector &vec) { - const std::vector shape = {vec.size()}; - auto format = py::format_descriptor::format(); - py::dtype dt(format); - py::array nd(dt, shape, (const char *)vec.data()); - return nd; -} - -template -py::list vectors_to_list(const vector> &vecs) { - py::list ndlist; - for (int i = 0; i < vecs.size(); i++) { - py::array nd = vector_to_ndarray(vecs[i]); - ndlist.append(nd); - } - return ndlist; -} - -PyCNNPredict::~PyCNNPredict() { - if (_predictor != NULL) { - delete _predictor; - _predictor = NULL; - } -} - -bool PyCNNPredict::init(string conf_file, string prefix) { - LOG(INFO) << "PyCNNPredict::init()"; - _predictor = create_cnnpredict(conf_file, prefix); - if (_predictor == NULL) { - LOG(FATAL) << "fail to call create_cnnpredict"; - return false; - } - return true; -} - -py::list PyCNNPredict::postprocess(const vector> &vdatas, - const vector> &vshapes) { - LOG(INFO) << "PyCNNPredict::postprocess()"; - - py::list result; - if (vdatas.size() != vshapes.size()) { - LOG(FATAL) << "datas and shapes size not equal"; - return result; - } - - result.append(vectors_to_list(vdatas)); - result.append(vectors_to_list(vshapes)); - - return result; -} - -py::list PyCNNPredict::predict(py::list input_datas, - py::list input_shapes, - py::list layer_names) { - LOG(INFO) << "PyCNNPredict::predict()"; - vector> inputdatas; - vector> inputshapes; - vector layernames; - vector> outputdatas; - vector> outputshapes; - - py::list result; - if (py::len(input_datas) != py::len(input_shapes)) { - LOG(FATAL) << "datas and shapes size not equal"; - return result; - } - - inputdatas = ndlist_to_vectors(input_datas); - inputshapes = ndlist_to_vectors(input_shapes); - layernames = list_to_vector(layer_names); - - bool ret = _predictor->predict( - inputdatas, inputshapes, layernames, outputdatas, outputshapes); - if (!ret) { - LOG(FATAL) << "fail to predict"; - return result; - } - - return postprocess(outputdatas, outputshapes); -} - -PYBIND11_MODULE(PyCNNPredict, m) { - m.doc() = "pycnnpredict"; - py::class_(m, "PyCNNPredict") - .def(py::init()) - .def("init", &PyCNNPredict::init) - .def("predict", &PyCNNPredict::predict); -} diff --git a/PaddleCV/Research/landmark/pypredict/py_cnnpredict.h b/PaddleCV/Research/landmark/pypredict/py_cnnpredict.h deleted file mode 100644 index 5070fe5d1043e45ff4defa694ae93d6746013060..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/landmark/pypredict/py_cnnpredict.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -namespace py = pybind11; -using std::string; -using std::vector; - -class PyCNNPredict { - public: - PyCNNPredict() : _predictor(NULL) {} - - ~PyCNNPredict(); - - bool init(string conf_file, string prefix); - - py::list predict(py::list input_datas, - py::list input_shapes, - py::list layer_names); - - private: - ICNNPredict *_predictor; - py::list postprocess(const vector> &vdatas, - const vector> &vshapes); -}; diff --git a/PaddleCV/Research/webvision2018/README.md b/PaddleCV/Research/webvision2018/README.md deleted file mode 100644 index 7c1818f4a1694ac4d2890a675f65f6fb5fc2c800..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/webvision2018/README.md +++ /dev/null @@ -1,59 +0,0 @@ -# WebVision Image Classification 2018 Challenge -The goal of this challenge is to advance the area of learning knowledge and representation from web data. The web data not only contains huge numbers of visual images, but also rich meta information concerning these visual data, which could be exploited to learn good representations and models. -More detail [[WebVision2018](https://www.vision.ee.ethz.ch/webvision/challenge.html)]. - -By observing the web data, we find that there are five key challenges, i.e., imbalanced class sizes, high intra-classes diversity and inter-class similarity, imprecise instances, -insufficient representative instances, and ambiguous class labels. To alleviate these challenges, we assume that every training instance has -the potential to contribute positively by alleviating the data bias and noise via reweighting the influence of each instance according to different -class sizes, large instance clusters, its confidence, small instance bags and the labels. In this manner, the influence of bias and noise in the -web data can be gradually alleviated, leading to the steadily improving performance of URNet. Experimental results in the WebVision 2018 -challenge with 16 million noisy training images from 5000 classes show that our approach outperforms state-of-the-art models and ranks the first -place in the image classification task. The detail of our solution can refer to our paper[[URNet](https://arxiv.org/abs/1811.00700)]. - -## 1.Prepare data -We have provided a download + preprocess script of valset data. -``` -cd data -sh download_webvision2018.sh -``` -Note that the server hosting Webvision Data reboots every day at midnight (Zurich time). You might want to change wget to something else. - -## 2.Environment installation -Cudnn >= 7, CUDA 8/9, PaddlePaddle version >= 1.3, python version 2.7 (More detail [[PaddlePaddle](https://github.com/paddlepaddle/paddle)]) - -## 3.Download pretrained model -| Model | Acc@1 | Acc@5 -| - | - | - -| [ResNeXt101_32x4d](https://paddlemodels.bj.bcebos.com/webvision/ResNeXt101_32x4d_Released.tar.gz) | 53.4% | 77.1% - -## 4.Test image -``` -sh run.sh -``` -or -``` -export CUDA_VISIBLE_DEVICES=$GPU_ID -export FLAGS_fraction_of_gpu_memory_to_use=1.0 -python infer.py --model ResNeXt101_32x4d \ - --pretrained_model $PRETRAINEDMODELPATH \ - --class_dim 5000 \ - --img_path $IMGPATH \ - --img_list $IMGLIST \ - --use_gpu True -``` - -You will get the predictions of images. -## 5.Evaluation -``` -export CUDA_VISIBLE_DEVICES=$GPU_ID -export FLAGS_fraction_of_gpu_memory_to_use=1.0 -python eval.py --model ResNeXt101_32x4d \ - --pretrained_model $PRETRAINEDMODELPATH \ - --class_dim 5000 \ - --img_path $IMGPATH \ - --img_list $IMGLIST \ - --use_gpu True - -``` -You will get the Acc@1 and Acc@5. - diff --git a/PaddleCV/Research/webvision2018/data/download_webvision2018.sh b/PaddleCV/Research/webvision2018/data/download_webvision2018.sh deleted file mode 100644 index fa09f78c6790b255fd303962e2762cc03616fe23..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/webvision2018/data/download_webvision2018.sh +++ /dev/null @@ -1,6 +0,0 @@ -wget https://data.vision.ee.ethz.ch/cvl/webvision2018/val_images_resized.tar -tar -xvf val_images_resized.tar -rm val_images_resized.tar -wget https://data.vision.ee.ethz.ch/cvl/webvision2018/val_filelist.txt -mv val_images_resized val -mv val_filelist.txt val_list.txt diff --git a/PaddleCV/Research/webvision2018/eval.py b/PaddleCV/Research/webvision2018/eval.py deleted file mode 100644 index ff8b9a8a33a28ec9c9f7ffbe3884523700d3a52b..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/webvision2018/eval.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import numpy as np -import time -import sys -import paddle -import paddle.fluid as fluid -import models -import reader -import argparse -import functools -from utils import add_arguments, print_arguments, accuracy -import math -import sys -reload(sys) -sys.setdefaultencoding('utf-8') - -parser = argparse.ArgumentParser(description=__doc__) -# yapf: disable -add_arg = functools.partial(add_arguments, argparser=parser) -add_arg('batch_size', int, 32, "Minibatch size.") -add_arg('use_gpu', bool, True, "Whether to use GPU or not.") -add_arg('class_dim', int, 5000, "Class number.") -add_arg('image_shape', str, "3,224,224", "Input image size") -add_arg('pretrained_model', str, None, "Whether to use pretrained model.") -add_arg('model', str, "ResNeXt101_32x4d", "Set the network to use.") -add_arg('img_list', str, "None", "list of valset.") -add_arg('img_path', str, "NOne", "path of valset.") -# yapf: enable - -model_list = [m for m in dir(models) if "__" not in m] - - -def eval(args): - # parameters from arguments - class_dim = args.class_dim - model_name = args.model - pretrained_model = args.pretrained_model - image_shape = [int(m) for m in args.image_shape.split(",")] - - assert model_name in model_list, "{} is not in lists: {}".format(args.model, - model_list) - - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - - # model definition - model = models.__dict__[model_name]() - - if model_name is "GoogleNet": - out, _, _ = model.net(input=image, class_dim=class_dim) - else: - out = model.net(input=image, class_dim=class_dim) - - test_program = fluid.default_main_program().clone(for_test=True) - - fetch_list = [out.name] - - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - if pretrained_model: - - def if_exist(var): - return os.path.exists(os.path.join(pretrained_model, var.name)) - - fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) - - - test_batch_size = args.batch_size - - img_size = image_shape[1] - test_reader = paddle.batch(reader.test(args, img_size), batch_size=test_batch_size) - feeder = fluid.DataFeeder(place=place, feed_list=[image]) - - targets = [] - with open(args.img_list, 'r') as f: - for line in f.readlines(): - targets.append(line.strip().split()[-1]) - targets = np.array(targets, dtype=np.int) - - preds = [] - TOPK = 5 - - for batch_id, data in enumerate(test_reader()): - all_result = exe.run(test_program, - fetch_list=fetch_list, - feed=feeder.feed(data)) - pred_label = np.argsort(-all_result[0], 1)[:,:5] - print("Test-{0}".format(batch_id)) - preds.append(pred_label) - preds = np.vstack(preds) - top1, top5 = accuracy(targets, preds) - print("top1:{:.4f} top5:{:.4f}".format(top1,top5)) - -def main(): - args = parser.parse_args() - print_arguments(args) - eval(args) - - -if __name__ == '__main__': - main() diff --git a/PaddleCV/Research/webvision2018/infer.py b/PaddleCV/Research/webvision2018/infer.py deleted file mode 100755 index 7f5448314320c21c363dbd2fd10ca990a6a1dc0f..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/webvision2018/infer.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import time -import sys -import math -import numpy as np -import argparse -import functools - -import paddle -import paddle.fluid as fluid -import reader -import models -import utils -from utils.utility import add_arguments,print_arguments - - -parser = argparse.ArgumentParser(description=__doc__) -# yapf: disable -add_arg = functools.partial(add_arguments, argparser=parser) -add_arg('use_gpu', bool, True, "Whether to use GPU or not.") -add_arg('class_dim', int, 5000, "Class number.") -add_arg('image_shape', str, "3,224,224", "Input image size") -add_arg('pretrained_model', str, None, "Whether to use pretrained model.") -add_arg('model', str, "ResNeXt101_32x4d", "Set the network to use.") -add_arg('save_inference', bool, False, "Whether to save inference model or not") -add_arg('resize_short_size', int, 256, "Set resize short size") -add_arg('img_list', str, None, "list of valset") -add_arg('img_path', str, None, "path of valset") -# yapf: enable - -def infer(args): - # parameters from arguments - class_dim = args.class_dim - model_name = args.model - save_inference = args.save_inference - pretrained_model = args.pretrained_model - image_shape = [int(m) for m in args.image_shape.split(",")] - model_list = [m for m in dir(models) if "__" not in m] - assert model_name in model_list, "{} is not in lists: {}".format(args.model, - model_list) - - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - - # model definition - model = models.__dict__[model_name]() - if model_name == "GoogleNet": - out, _, _ = model.net(input=image, class_dim=class_dim) - else: - out = model.net(input=image, class_dim=class_dim) - - test_program = fluid.default_main_program().clone(for_test=True) - - fetch_list = [out.name] - - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - fluid.io.load_persistables(exe, pretrained_model) - if save_inference: - fluid.io.save_inference_model( - dirname=model_name, - feeded_var_names=['image'], - main_program=test_program, - target_vars=out, - executor=exe, - model_filename='model', - params_filename='params') - print("model: ",model_name," is already saved") - exit(0) - test_batch_size = 1 - img_size = image_shape[1] - test_reader = paddle.batch(reader.test(args, img_size), batch_size=test_batch_size) - feeder = fluid.DataFeeder(place=place, feed_list=[image]) - - TOPK = 1 - for batch_id, data in enumerate(test_reader()): - result = exe.run(test_program, - fetch_list=fetch_list, - feed=feeder.feed(data)) - - result = result[0][0] - pred_label = np.argsort(result)[::-1][:TOPK] - print("Test-{0}-score: {1}, class {2}" - .format(batch_id, result[pred_label], pred_label)) - sys.stdout.flush() - - -def main(): - args = parser.parse_args() - print_arguments(args) - infer(args) - - -if __name__ == '__main__': - main() diff --git a/PaddleCV/Research/webvision2018/models/__init__.py b/PaddleCV/Research/webvision2018/models/__init__.py deleted file mode 100644 index 56e5f333d0d218cfa8401331cce3d0be21701dff..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/webvision2018/models/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .resnext_32x4d import ResNeXt50_32x4d, ResNeXt101_32x4d, ResNeXt152_32x4d diff --git a/PaddleCV/Research/webvision2018/models/resnext_32x4d.py b/PaddleCV/Research/webvision2018/models/resnext_32x4d.py deleted file mode 100644 index 60d7b3cd0d674319ce908215ee73daaef19756aa..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/webvision2018/models/resnext_32x4d.py +++ /dev/null @@ -1,178 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle -import paddle.fluid as fluid -import math -from paddle.fluid.param_attr import ParamAttr - -__all__ = ["ResNeXt", "ResNeXt50_32x4d", "ResNeXt101_32x4d", "ResNeXt152_32x4d"] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class ResNeXt(): - def __init__(self, layers=50): - self.params = train_parameters - self.layers = layers - - def net(self, input, class_dim=1000): - layers = self.layers - supported_layers = [50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters = [256, 512, 1024, 2048] - cardinality = 32 - - conv = self.conv_bn_layer( - input=input, - num_filters=64, - filter_size=7, - stride=2, - act='relu', - name="res_conv1") - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=cardinality, - name=conv_name) - - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc(input=pool, - size=class_dim, - act='softmax', - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv),name='fc_weights'), - bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) - return out - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False, - name=name + '.conv2d.output.1') - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', ) - - def shortcut(self, input, ch_out, stride, name): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, cardinality, name): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=1, - act='relu', - name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - groups=cardinality, - act='relu', - name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters, - filter_size=1, - act=None, - name=name + "_branch2c") - - short = self.shortcut( - input, num_filters, stride, name=name + "_branch1") - - return fluid.layers.elementwise_add( - x=short, y=conv2, act='relu', name=name + ".add.output.5") - - -def ResNeXt50_32x4d(): - model = ResNeXt(layers=50) - return model - - -def ResNeXt101_32x4d(): - model = ResNeXt(layers=101) - return model - - -def ResNeXt152_32x4d(): - model = ResNeXt(layers=152) - return model diff --git a/PaddleCV/Research/webvision2018/reader.py b/PaddleCV/Research/webvision2018/reader.py deleted file mode 100644 index 06a11399bc67ba3bd93da5fb6cffdb19f8505840..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/webvision2018/reader.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import math -import random -import functools -import numpy as np -import paddle -import cv2 -import io - -random.seed(0) -np.random.seed(0) - -THREAD = 8 -BUF_SIZE = 128 - -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - -def rotate_image(img): - """ rotate_image """ - (h, w) = img.shape[:2] - center = (w / 2, h / 2) - angle = np.random.randint(-10, 11) - M = cv2.getRotationMatrix2D(center, angle, 1.0) - rotated = cv2.warpAffine(img, M, (w, h)) - return rotated - -def random_crop(img, size, scale=None, ratio=None): - """ random_crop """ - scale = [0.08, 1.0] if scale is None else scale - ratio = [3. / 4., 4. / 3.] if ratio is None else ratio - - aspect_ratio = math.sqrt(np.random.uniform(*ratio)) - w = 1. * aspect_ratio - h = 1. / aspect_ratio - - bound = min((float(img.shape[1]) / img.shape[0]) / (w ** 2), - (float(img.shape[0]) / img.shape[1]) / (h ** 2)) - scale_max = min(scale[1], bound) - scale_min = min(scale[0], bound) - - target_area = img.shape[0] * img.shape[1] * np.random.uniform(scale_min, - scale_max) - target_size = math.sqrt(target_area) - w = int(target_size * w) - h = int(target_size * h) - - i = np.random.randint(0, img.size[0] - w + 1) - j = np.random.randint(0, img.size[1] - h + 1) - - img = img[i:i+h, j:j+w, :] - resized = cv2.resize(img, (size, size), - interpolation=cv2.INTER_CUBIC - ) - return resized - -def distort_color(img): - return img - -def resize_short(img, target_size): - """ resize_short """ - percent = float(target_size) / min(img.shape[0], img.shape[1]) - resized_width = int(round(img.shape[1] * percent)) - resized_height = int(round(img.shape[0] * percent)) - resized = cv2.resize(img, (resized_width, resized_height), - interpolation=cv2.INTER_CUBIC - ) - return resized - -def crop_image(img, target_size, center): - """ crop_image """ - height, width = img.shape[:2] - size = target_size - if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img[h_start:h_end, w_start:w_end, :] - return img - -def process_image(sample, mode, color_jitter, rotate, - crop_size=224, mean=None, std=None): - """ process_image """ - - mean = [0.485, 0.456, 0.406] if mean is None else mean - std = [0.229, 0.224, 0.225] if std is None else std - - - img_path = sample[0] - img = cv2.imread(img_path) - img = cv2.resize(img, (crop_size, crop_size)) - - img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255 - img_mean = np.array(mean).reshape((3, 1, 1)) - img_std = np.array(std).reshape((3, 1, 1)) - img -= img_mean - img /= img_std - - return (img, ) - -def image_mapper(**kwargs): - """ image_mapper """ - return functools.partial(process_image, **kwargs) - -def _reader_creator(file_list, - mode, - shuffle=False, - color_jitter=False, - rotate=False, - data_dir=None, - crop_size=224): - def reader(): - - with open(file_list) as flist: - full_lines = [line.strip() for line in flist] - if shuffle: - np.random.shuffle(lines) - lines = full_lines - for line in lines: - img_path, label = line.strip().split() - img_path = os.path.join(data_dir, img_path) - yield [img_path] - - - image_mapper = functools.partial(process_image, - mode=mode, color_jitter=color_jitter, rotate=rotate, crop_size=crop_size) - reader = paddle.reader.xmap_readers( - image_mapper, reader, THREAD, BUF_SIZE, order=True) - return reader - -def create_img_reader(args): - def reader(): - img_path = args.img_path - yield [img_path] - return reader - -def test(settings, crop_size): - file_list = settings.img_list - data_dir = settings.img_path - return _reader_creator(file_list, 'test', shuffle=False, data_dir=data_dir, crop_size=crop_size) diff --git a/PaddleCV/Research/webvision2018/run.sh b/PaddleCV/Research/webvision2018/run.sh deleted file mode 100644 index 06b1d77925c4d1118f30115aa1d3e4461a586825..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/webvision2018/run.sh +++ /dev/null @@ -1,10 +0,0 @@ -export CUDA_VISIBLE_DEVICES=0 -export FLAGS_fraction_of_gpu_memory_to_use=1.0 -python infer.py \ - --model ResNeXt101_32x4d \ - --class_dim 5000 \ - --pretrained ./ckpt/ResNeXt101_32x4d_Release/ \ - --img_list ./data/val_list.txt \ - --img_path ./data/val/ \ - --use_gpu True - diff --git a/PaddleCV/Research/webvision2018/utils/__init__.py b/PaddleCV/Research/webvision2018/utils/__init__.py deleted file mode 100644 index 3900e439e072a35d3df7a6f6e486c0402c0f4e4f..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/webvision2018/utils/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .utility import add_arguments, print_arguments -from .class_accuracy import accuracy diff --git a/PaddleCV/Research/webvision2018/utils/class_accuracy.py b/PaddleCV/Research/webvision2018/utils/class_accuracy.py deleted file mode 100644 index 73ede801c00f0fa952ae017cb0be7f239f8828f6..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/webvision2018/utils/class_accuracy.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import numpy as np - -def accuracy(targets, preds): - """Get the class-level top1 and top5 of model. - - Usage: - - .. code-blcok::python - - top1, top5 = accuracy(targets, preds) - - :params args: evaluate the prediction of model. - :type args: numpy.array - - """ - top1 = np.zeros((5000,), dtype=np.float32) - top5 = np.zeros((5000,), dtype=np.float32) - count = np.zeros((5000,), dtype=np.float32) - - for index in range(targets.shape[0]): - target = targets[index] - if target == preds[index,0]: - top1[target] += 1 - top5[target] += 1 - elif np.sum(target == preds[index,:5]): - top5[target] += 1 - - count[target] += 1 - return (top1/(count+1e-12)).mean(), (top5/(count+1e-12)).mean() diff --git a/PaddleCV/Research/webvision2018/utils/utility.py b/PaddleCV/Research/webvision2018/utils/utility.py deleted file mode 100644 index f622da74db6cceea87f69990d36497a8c17b5ade..0000000000000000000000000000000000000000 --- a/PaddleCV/Research/webvision2018/utils/utility.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import distutils.util -import numpy as np -import six - -def print_arguments(args): - """Print argparse's arguments. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - parser.add_argument("name", default="Jonh", type=str, help="User name.") - args = parser.parse_args() - print_arguments(args) - - :param args: Input argparse.Namespace for printing. - :type args: argparse.Namespace - """ - print("------------- Configuration Arguments -------------") - for arg, value in sorted(six.iteritems(vars(args))): - print("%25s : %s" % (arg, value)) - print("----------------------------------------------------") - - -def add_arguments(argname, type, default, help, argparser, **kwargs): - """Add argparse's argument. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - add_argument("name", str, "Jonh", "User name.", parser) - args = parser.parse_args() - """ - type = distutils.util.strtobool if type == bool else type - argparser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) diff --git a/PaddleCV/caffe2fluid/.gitignore b/PaddleCV/caffe2fluid/.gitignore deleted file mode 100644 index 0289f964a81f92cde27b2ef63198ae445cdcffad..0000000000000000000000000000000000000000 --- a/PaddleCV/caffe2fluid/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -proto/caffepb.py -proto/caffe_pb2.py diff --git a/PaddleCV/caffe2fluid/README.md b/PaddleCV/caffe2fluid/README.md deleted file mode 100644 index 78702204ba32ffa63bcab4aef999267a5d7c1078..0000000000000000000000000000000000000000 --- a/PaddleCV/caffe2fluid/README.md +++ /dev/null @@ -1,6 +0,0 @@ - -Hi! - -This directory has been deprecated. - -Please visit the project at [X2Paddle](https://github.com/PaddlePaddle/X2Paddle). diff --git a/PaddleCV/deeplabv3+/.gitignore b/PaddleCV/deeplabv3+/.gitignore deleted file mode 100644 index cfe470860395ca802c6d95dc829385cd0d112ad4..0000000000000000000000000000000000000000 --- a/PaddleCV/deeplabv3+/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -*.tgz -deeplabv3plus_gn_init* -deeplabv3plus_xception65_initialize* -*.log -*.sh -output* diff --git a/PaddleCV/deeplabv3+/.run_ce.sh b/PaddleCV/deeplabv3+/.run_ce.sh deleted file mode 100755 index c4e6055e1d9d3ad9a9b039d5973c100e76a8aadf..0000000000000000000000000000000000000000 --- a/PaddleCV/deeplabv3+/.run_ce.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -export MKL_NUM_THREADS=1 -export OMP_NUM_THREADS=1 - -DATASET_PATH=${HOME}/.cache/paddle/dataset/cityscape/ - -cudaid=${deeplabv3plus:=0} # use 0-th card as default -export CUDA_VISIBLE_DEVICES=$cudaid - -FLAGS_benchmark=true python train.py \ ---batch_size=2 \ ---train_crop_size=769 \ ---total_step=50 \ ---save_weights_path=output1 \ ---dataset_path=$DATASET_PATH \ ---enable_ce | python _ce.py - -cudaid=${deeplabv3plus_m:=0,1,2,3} # use 0,1,2,3 card as default -export CUDA_VISIBLE_DEVICES=$cudaid - -FLAGS_benchmark=true python train.py \ ---batch_size=8 \ ---train_crop_size=769 \ ---total_step=50 \ ---save_weights_path=output4 \ ---dataset_path=$DATASET_PATH \ ---enable_ce | python _ce.py diff --git a/PaddleCV/deeplabv3+/README.md b/PaddleCV/deeplabv3+/README.md deleted file mode 100644 index 33608c9561b246c90a24ba45a0c3f5d5ecae6ef8..0000000000000000000000000000000000000000 --- a/PaddleCV/deeplabv3+/README.md +++ /dev/null @@ -1,121 +0,0 @@ -**该项目已被迁移至[PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg), 这个项目包含了更多的分割模型。** - -DeepLab运行本目录下的程序示例需要使用PaddlePaddle Fluid v1.3.0版本或以上。如果您的PaddlePaddle安装版本低于此要求,请按照安装文档中的说明更新PaddlePaddle安装版本,如果使用GPU,该程序需要使用cuDNN v7版本。 - - -## 代码结构 -``` -├── models.py # 网络结构定义脚本 -├── train.py # 训练任务脚本 -├── eval.py # 评估脚本 -└── reader.py # 定义通用的函数以及数据预处理脚本 -``` - -## 简介 - -DeepLabv3+ 是DeepLab语义分割系列网络的最新作,其前作有 DeepLabv1,DeepLabv2, DeepLabv3, -在最新作中,DeepLab的作者通过encoder-decoder进行多尺度信息的融合,同时保留了原来的空洞卷积和ASSP层, -其骨干网络使用了Xception模型,提高了语义分割的健壮性和运行速率,在 PASCAL VOC 2012 dataset取得新的state-of-art performance,89.0mIOU。 - -![](./imgs/model.png) - -Python Notebook demo](https://aistudio.baidu.com/aistudio/projectDetail/124366) - -## 数据准备 - - - -本文采用Cityscape数据集,请前往[Cityscape官网](https://www.cityscapes-dataset.com)注册下载。 -下载以后的数据目录结构如下 -``` -data/cityscape/ -|-- gtFine -| |-- test -| |-- train -| `-- val -|-- leftImg8bit - |-- test - |-- train - `-- val -``` - -# 预训练模型准备 - -我们为了节约更多的显存,在这里我们使用Group Norm作为我们的归一化手段。 -如果需要从头开始训练模型,用户需要下载我们的初始化模型 -``` -wget https://paddle-deeplab.bj.bcebos.com/deeplabv3plus_gn_init.tgz -tar -xf deeplabv3plus_gn_init.tgz && rm deeplabv3plus_gn_init.tgz -``` -如果需要最终训练模型进行fine tune或者直接用于预测,请下载我们的最终模型 -``` -wget https://paddle-deeplab.bj.bcebos.com/deeplabv3plus_gn.tgz -tar -xf deeplabv3plus_gn.tgz && rm deeplabv3plus_gn.tgz -``` - - -## 模型训练与预测 - -### 训练 -执行以下命令进行训练,同时指定weights的保存路径,初始化路径,以及数据存放位置: -``` -python ./train.py \ - --batch_size=1 \ - --train_crop_size=769 \ - --total_step=50 \ - --norm_type=gn \ - --init_weights_path=$INIT_WEIGHTS_PATH \ - --save_weights_path=$SAVE_WEIGHTS_PATH \ - --dataset_path=$DATASET_PATH -``` -注意:如果在windows系统上进行训练,预测请不要将use_multiprocessing设置为True - -使用以下命令获得更多使用说明: -``` -python train.py --help -``` -以上命令用于测试训练过程是否正常,仅仅迭代了50次并且使用了1的batch size,如果需要复现 -原论文的实验,请使用以下设置: -``` -CUDA_VISIBLE_DEVICES=0 \ -python ./train.py \ - --batch_size=4 \ - --parallel=True \ - --norm_type=gn \ - --train_crop_size=769 \ - --total_step=500000 \ - --base_lr=0.001 \ - --init_weights_path=deeplabv3plus_gn_init \ - --save_weights_path=output \ - --dataset_path=$DATASET_PATH -``` -如果您的显存不足,可以尝试减小`batch_size`,同时等比例放大`total_step`, 缩小`base_lr`, 保证相乘的值不变,这得益于Group Norm的特性,改变 `batch_size` 并不会显著影响结果,而且能够节约更多显存, 比如您可以设置`--batch_size=2 --total_step=1000000 --base_lr=0.0005`。 - -### 测试 -执行以下命令在`Cityscape`测试数据集上进行测试: -``` -python ./eval.py \ - --init_weights_path=deeplabv3plus_gn \ - --norm_type=gn \ - --dataset_path=$DATASET_PATH -``` -需要通过选项`--init_weights_path`指定模型文件。测试脚本的输出的评估指标为mean IoU。 - - -## 实验结果 -训练完成以后,使用`eval.py`在验证集上进行测试,得到以下结果: -``` -load from: ../models/deeplabv3plus_gn -total number 500 -step: 500, mIoU: 0.7881 -``` - -## 其他信息 - -|数据集 | norm type | pretrained model | trained model | mean IoU -|---|---|---|---|---| -|CityScape | group norm | [deeplabv3plus_gn_init.tgz](https://paddle-deeplab.bj.bcebos.com/deeplabv3plus_gn_init.tgz) | [deeplabv3plus_gn.tgz](https://paddle-deeplab.bj.bcebos.com/deeplabv3plus_gn.tgz) | 0.7881 | - -## 参考 - -- [Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611) diff --git a/PaddleCV/deeplabv3+/__init__.py b/PaddleCV/deeplabv3+/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/PaddleCV/deeplabv3+/_ce.py b/PaddleCV/deeplabv3+/_ce.py deleted file mode 100644 index b0127d6445213b9d3934220fa36e9eb44d3e04b4..0000000000000000000000000000000000000000 --- a/PaddleCV/deeplabv3+/_ce.py +++ /dev/null @@ -1,60 +0,0 @@ -# this file is only used for continuous evaluation test! - -import os -import sys -sys.path.append(os.environ['ceroot']) -from kpi import CostKpi -from kpi import DurationKpi - -each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.1, 0, actived=True) -train_loss_card1_kpi = CostKpi('train_loss_card1', 0.05, 0) -each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.1, 0, actived=True) -train_loss_card4_kpi = CostKpi('train_loss_card4', 0.05, 0) - -tracking_kpis = [ - each_pass_duration_card1_kpi, - train_loss_card1_kpi, - each_pass_duration_card4_kpi, - train_loss_card4_kpi, - ] - - -def parse_log(log): - ''' - This method should be implemented by model developers. - - The suggestion: - - each line in the log should be key, value, for example: - - " - train_cost\t1.0 - test_cost\t1.0 - train_cost\t1.0 - train_cost\t1.0 - train_acc\t1.2 - " - ''' - for line in log.split('\n'): - fs = line.strip().split('\t') - print(fs) - if len(fs) == 3 and fs[0] == 'kpis': - kpi_name = fs[1] - kpi_value = float(fs[2]) - yield kpi_name, kpi_value - - -def log_to_ce(log): - kpi_tracker = {} - for kpi in tracking_kpis: - kpi_tracker[kpi.name] = kpi - - for (kpi_name, kpi_value) in parse_log(log): - print(kpi_name, kpi_value) - kpi_tracker[kpi_name].add_record(kpi_value) - kpi_tracker[kpi_name].persist() - - -if __name__ == '__main__': - log = sys.stdin.read() - log_to_ce(log) diff --git a/PaddleCV/deeplabv3+/data_utils.py b/PaddleCV/deeplabv3+/data_utils.py deleted file mode 100644 index c43e040af8d46f0fc57ee9862da4ee749768e7c6..0000000000000000000000000000000000000000 --- a/PaddleCV/deeplabv3+/data_utils.py +++ /dev/null @@ -1,151 +0,0 @@ -""" -This code is based on https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py -""" - -import time -import numpy as np -import threading -import multiprocessing -try: - import queue -except ImportError: - import Queue as queue - - -class GeneratorEnqueuer(object): - """ - Builds a queue out of a data generator. - - Args: - generator: a generator function which endlessly yields data - use_multiprocessing (bool): use multiprocessing if True, - otherwise use threading. - wait_time (float): time to sleep in-between calls to `put()`. - random_seed (int): Initial seed for workers, - will be incremented by one for each workers. - """ - - def __init__(self, - generator, - use_multiprocessing=False, - wait_time=0.05, - random_seed=None): - self.wait_time = wait_time - self._generator = generator - self._use_multiprocessing = use_multiprocessing - self._threads = [] - self._stop_event = None - self.queue = None - self._manager = None - self.seed = random_seed - - def start(self, workers=1, max_queue_size=10): - """ - Start worker threads which add data from the generator into the queue. - - Args: - workers (int): number of worker threads - max_queue_size (int): queue size - (when full, threads could block on `put()`) - """ - - def data_generator_task(): - """ - Data generator task. - """ - - def task(): - if (self.queue is not None and - self.queue.qsize() < max_queue_size): - generator_output = next(self._generator) - self.queue.put((generator_output)) - else: - time.sleep(self.wait_time) - - if not self._use_multiprocessing: - while not self._stop_event.is_set(): - with self.genlock: - try: - task() - except Exception: - self._stop_event.set() - break - else: - while not self._stop_event.is_set(): - try: - task() - except Exception: - self._stop_event.set() - break - - try: - if self._use_multiprocessing: - self._manager = multiprocessing.Manager() - self.queue = self._manager.Queue(maxsize=max_queue_size) - self._stop_event = multiprocessing.Event() - else: - self.genlock = threading.Lock() - self.queue = queue.Queue() - self._stop_event = threading.Event() - for _ in range(workers): - if self._use_multiprocessing: - # Reset random seed else all children processes - # share the same seed - np.random.seed(self.seed) - thread = multiprocessing.Process(target=data_generator_task) - thread.daemon = True - if self.seed is not None: - self.seed += 1 - else: - thread = threading.Thread(target=data_generator_task) - self._threads.append(thread) - thread.start() - except: - self.stop() - raise - - def is_running(self): - """ - Returns: - bool: Whether the worker theads are running. - """ - return self._stop_event is not None and not self._stop_event.is_set() - - def stop(self, timeout=None): - """ - Stops running threads and wait for them to exit, if necessary. - Should be called by the same thread which called `start()`. - - Args: - timeout(int|None): maximum time to wait on `thread.join()`. - """ - if self.is_running(): - self._stop_event.set() - for thread in self._threads: - if self._use_multiprocessing: - if thread.is_alive(): - thread.join(timeout) - else: - thread.join(timeout) - if self._manager: - self._manager.shutdown() - - self._threads = [] - self._stop_event = None - self.queue = None - - def get(self): - """ - Creates a generator to extract data from the queue. - Skip the data if it is `None`. - - # Yields - tuple of data in the queue. - """ - while self.is_running(): - if not self.queue.empty(): - inputs = self.queue.get() - if inputs is not None: - yield inputs - else: - time.sleep(self.wait_time) diff --git a/PaddleCV/deeplabv3+/eval.py b/PaddleCV/deeplabv3+/eval.py deleted file mode 100644 index 1ac34511c6d19ffafb07b2b78a43ddf3d39ab796..0000000000000000000000000000000000000000 --- a/PaddleCV/deeplabv3+/eval.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -if 'FLAGS_fraction_of_gpu_memory_to_use' not in os.environ: - os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = '0.98' -os.environ['FLAGS_enable_parallel_graph'] = '1' - -import paddle -import paddle.fluid as fluid -import numpy as np -import argparse -from reader import CityscapeDataset -import reader -import models -import sys -import utility - -parser = argparse.ArgumentParser() -add_arg = lambda *args: utility.add_arguments(*args, argparser=parser) - -# yapf: disable -add_arg('total_step', int, -1, "Number of the step to be evaluated, -1 for full evaluation.") -add_arg('init_weights_path', str, None, "Path of the weights to evaluate.") -add_arg('dataset_path', str, None, "Cityscape dataset path.") -add_arg('use_gpu', bool, True, "Whether use GPU or CPU.") -add_arg('num_classes', int, 19, "Number of classes.") -add_arg('use_py_reader', bool, True, "Use py_reader.") -add_arg('use_multiprocessing', bool, False, "Use multiprocessing.") -add_arg('norm_type', str, 'bn', "Normalization type, should be 'bn' or 'gn'.") -#yapf: enable - - -def mean_iou(pred, label): - label = fluid.layers.elementwise_min( - label, fluid.layers.assign(np.array( - [num_classes], dtype=np.int32))) - label_ignore = (label == num_classes).astype('int32') - label_nignore = (label != num_classes).astype('int32') - - pred = pred * label_nignore + label_ignore * num_classes - - miou, wrong, correct = fluid.layers.mean_iou(pred, label, num_classes + 1) - return miou, wrong, correct - - -def load_model(): - if os.path.isdir(args.init_weights_path): - fluid.io.load_params( - exe, dirname=args.init_weights_path, main_program=tp) - else: - fluid.io.load_params( - exe, dirname="", filename=args.init_weights_path, main_program=tp) - - -CityscapeDataset = reader.CityscapeDataset - -args = parser.parse_args() -utility.check_gpu(args.use_gpu) - -models.clean() -models.is_train = False -models.default_norm_type = args.norm_type -deeplabv3p = models.deeplabv3p - -image_shape = [1025, 2049] -eval_shape = [1024, 2048] - -sp = fluid.Program() -tp = fluid.Program() -batch_size = 1 -reader.default_config['crop_size'] = -1 -reader.default_config['shuffle'] = False -num_classes = args.num_classes - -with fluid.program_guard(tp, sp): - if args.use_py_reader: - py_reader = fluid.layers.py_reader(capacity=64, - shapes=[[1, 3, 0, 0], [1] + eval_shape], - dtypes=['float32', 'int32']) - img, label = fluid.layers.read_file(py_reader) - else: - img = fluid.layers.data(name='img', shape=[3, 0, 0], dtype='float32') - label = fluid.layers.data(name='label', shape=eval_shape, dtype='int32') - - img = fluid.layers.resize_bilinear(img, image_shape) - logit = deeplabv3p(img) - logit = fluid.layers.resize_bilinear(logit, eval_shape) - pred = fluid.layers.argmax(logit, axis=1).astype('int32') - miou, out_wrong, out_correct = mean_iou(pred, label) - -tp = tp.clone(True) - -place = fluid.CPUPlace() -if args.use_gpu: - place = fluid.CUDAPlace(0) -exe = fluid.Executor(place) -exe.run(sp) - -if args.init_weights_path: - print("load from:", args.init_weights_path) - load_model() - -dataset = CityscapeDataset(args.dataset_path, 'val') -if args.total_step == -1: - total_step = len(dataset.label_files) -else: - total_step = args.total_step - -batches = dataset.get_batch_generator(batch_size, total_step, use_multiprocessing=args.use_multiprocessing) -if args.use_py_reader: - py_reader.decorate_tensor_provider(lambda :[ (yield b[0],b[1]) for b in batches]) - py_reader.start() - -sum_iou = 0 -all_correct = np.array([0], dtype=np.int64) -all_wrong = np.array([0], dtype=np.int64) - -for i in range(total_step): - if not args.use_py_reader: - _, imgs, labels, names = next(batches) - result = exe.run(tp, - feed={'img': imgs, - 'label': labels}, - fetch_list=[pred, miou, out_wrong, out_correct]) - else: - result = exe.run(tp, - fetch_list=[pred, miou, out_wrong, out_correct]) - - wrong = result[2][:-1] + all_wrong - right = result[3][:-1] + all_correct - all_wrong = wrong.copy() - all_correct = right.copy() - mp = (wrong + right) != 0 - miou2 = np.mean((right[mp] * 1.0 / (right[mp] + wrong[mp]))) - print('step: %s, mIoU: %s' % (i + 1, miou2)) - -print('eval done!') diff --git a/PaddleCV/deeplabv3+/imgs/model.png b/PaddleCV/deeplabv3+/imgs/model.png deleted file mode 100644 index c0f12db6474e28f68ea45aa498026ef5261bcbe9..0000000000000000000000000000000000000000 Binary files a/PaddleCV/deeplabv3+/imgs/model.png and /dev/null differ diff --git a/PaddleCV/deeplabv3+/models.py b/PaddleCV/deeplabv3+/models.py deleted file mode 100644 index 2be61e300c0da1dd1d855653297a02fb2f922c75..0000000000000000000000000000000000000000 --- a/PaddleCV/deeplabv3+/models.py +++ /dev/null @@ -1,345 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle -import paddle.fluid as fluid - -import contextlib -import os -name_scope = "" - -decode_channel = 48 -encode_channel = 256 -label_number = 19 - -bn_momentum = 0.99 -dropout_keep_prop = 0.9 -is_train = True - -op_results = {} - -default_epsilon = 1e-3 -default_norm_type = 'bn' -default_group_number = 32 -depthwise_use_cudnn = False - -bn_regularizer = fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0) -depthwise_regularizer = fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0) - - -@contextlib.contextmanager -def scope(name): - global name_scope - bk = name_scope - name_scope = name_scope + name + os.sep - yield - name_scope = bk - - -def check(data, number): - if type(data) == int: - return [data] * number - assert len(data) == number - return data - - -def clean(): - global op_results - op_results = {} - - -def append_op_result(result, name): - global op_results - op_index = len(op_results) - name = name_scope + name + str(op_index) - op_results[name] = result - return result - - -def conv(*args, **kargs): - if "xception" in name_scope: - init_std = 0.09 - elif "logit" in name_scope: - init_std = 0.01 - elif name_scope.endswith('depthwise' + os.sep): - init_std = 0.33 - else: - init_std = 0.06 - if name_scope.endswith('depthwise' + os.sep): - regularizer = depthwise_regularizer - else: - regularizer = None - - kargs['param_attr'] = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=regularizer, - initializer=fluid.initializer.TruncatedNormal( - loc=0.0, scale=init_std)) - if 'bias_attr' in kargs and kargs['bias_attr']: - kargs['bias_attr'] = fluid.ParamAttr( - name=name_scope + 'biases', - regularizer=regularizer, - initializer=fluid.initializer.ConstantInitializer(value=0.0)) - else: - kargs['bias_attr'] = False - kargs['name'] = name_scope + 'conv' - return append_op_result(fluid.layers.conv2d(*args, **kargs), 'conv') - - -def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None): - N, C, H, W = input.shape - if C % G != 0: - # print "group can not divide channle:", C, G - for d in range(10): - for t in [d, -d]: - if G + t <= 0: continue - if C % (G + t) == 0: - G = G + t - break - if C % G == 0: - # print "use group size:", G - break - assert C % G == 0 - x = fluid.layers.group_norm( - input, - groups=G, - param_attr=param_attr, - bias_attr=bias_attr, - name=name_scope + 'group_norm') - return x - - -def bn(*args, **kargs): - if default_norm_type == 'bn': - with scope('BatchNorm'): - return append_op_result( - fluid.layers.batch_norm( - *args, - epsilon=default_epsilon, - momentum=bn_momentum, - param_attr=fluid.ParamAttr( - name=name_scope + 'gamma', regularizer=bn_regularizer), - bias_attr=fluid.ParamAttr( - name=name_scope + 'beta', regularizer=bn_regularizer), - moving_mean_name=name_scope + 'moving_mean', - moving_variance_name=name_scope + 'moving_variance', - **kargs), - 'bn') - elif default_norm_type == 'gn': - with scope('GroupNorm'): - return append_op_result( - group_norm( - args[0], - default_group_number, - eps=default_epsilon, - param_attr=fluid.ParamAttr( - name=name_scope + 'gamma', regularizer=bn_regularizer), - bias_attr=fluid.ParamAttr( - name=name_scope + 'beta', regularizer=bn_regularizer)), - 'gn') - else: - raise "Unsupport norm type:" + default_norm_type - - -def bn_relu(data): - return append_op_result(fluid.layers.relu(bn(data)), 'relu') - - -def relu(data): - return append_op_result( - fluid.layers.relu( - data, name=name_scope + 'relu'), 'relu') - - -def seperate_conv(input, channel, stride, filter, dilation=1, act=None): - with scope('depthwise'): - input = conv( - input, - input.shape[1], - filter, - stride, - groups=input.shape[1], - padding=(filter // 2) * dilation, - dilation=dilation, - use_cudnn=depthwise_use_cudnn) - input = bn(input) - if act: input = act(input) - with scope('pointwise'): - input = conv(input, channel, 1, 1, groups=1, padding=0) - input = bn(input) - if act: input = act(input) - return input - - -def xception_block(input, - channels, - strides=1, - filters=3, - dilation=1, - skip_conv=True, - has_skip=True, - activation_fn_in_separable_conv=False): - repeat_number = 3 - channels = check(channels, repeat_number) - filters = check(filters, repeat_number) - strides = check(strides, repeat_number) - data = input - results = [] - for i in range(repeat_number): - with scope('separable_conv' + str(i + 1)): - if not activation_fn_in_separable_conv: - data = relu(data) - data = seperate_conv( - data, - channels[i], - strides[i], - filters[i], - dilation=dilation) - else: - data = seperate_conv( - data, - channels[i], - strides[i], - filters[i], - dilation=dilation, - act=relu) - results.append(data) - if not has_skip: - return append_op_result(data, 'xception_block'), results - if skip_conv: - with scope('shortcut'): - skip = bn( - conv( - input, channels[-1], 1, strides[-1], groups=1, padding=0)) - else: - skip = input - return append_op_result(data + skip, 'xception_block'), results - - -def entry_flow(data): - with scope("entry_flow"): - with scope("conv1"): - data = conv(data, 32, 3, stride=2, padding=1) - data = bn_relu(data) - with scope("conv2"): - data = conv(data, 64, 3, stride=1, padding=1) - data = bn_relu(data) - with scope("block1"): - data, _ = xception_block(data, 128, [1, 1, 2]) - with scope("block2"): - data, results = xception_block(data, 256, [1, 1, 2]) - with scope("block3"): - data, _ = xception_block(data, 728, [1, 1, 2]) - return data, results[1] - - -def middle_flow(data): - with scope("middle_flow"): - for i in range(16): - with scope("block" + str(i + 1)): - data, _ = xception_block(data, 728, [1, 1, 1], skip_conv=False) - return data - - -def exit_flow(data): - with scope("exit_flow"): - with scope('block1'): - data, _ = xception_block(data, [728, 1024, 1024], [1, 1, 1]) - with scope('block2'): - data, _ = xception_block( - data, [1536, 1536, 2048], [1, 1, 1], - dilation=2, - has_skip=False, - activation_fn_in_separable_conv=True) - return data - - -def dropout(x, keep_rate): - if is_train: - return fluid.layers.dropout(x, 1 - keep_rate) / keep_rate - else: - return x - - -def encoder(input): - with scope('encoder'): - channel = 256 - with scope("image_pool"): - image_avg = fluid.layers.reduce_mean(input, [2, 3], keep_dim=True) - append_op_result(image_avg, 'reduce_mean') - image_avg = bn_relu( - conv( - image_avg, channel, 1, 1, groups=1, padding=0)) - image_avg = fluid.layers.resize_bilinear(image_avg, input.shape[2:]) - - with scope("aspp0"): - aspp0 = bn_relu(conv(input, channel, 1, 1, groups=1, padding=0)) - with scope("aspp1"): - aspp1 = seperate_conv(input, channel, 1, 3, dilation=6, act=relu) - with scope("aspp2"): - aspp2 = seperate_conv(input, channel, 1, 3, dilation=12, act=relu) - with scope("aspp3"): - aspp3 = seperate_conv(input, channel, 1, 3, dilation=18, act=relu) - with scope("concat"): - data = append_op_result( - fluid.layers.concat( - [image_avg, aspp0, aspp1, aspp2, aspp3], axis=1), - 'concat') - data = bn_relu(conv(data, channel, 1, 1, groups=1, padding=0)) - data = dropout(data, dropout_keep_prop) - return data - - -def decoder(encode_data, decode_shortcut): - with scope('decoder'): - with scope('concat'): - decode_shortcut = bn_relu( - conv( - decode_shortcut, decode_channel, 1, 1, groups=1, padding=0)) - encode_data = fluid.layers.resize_bilinear( - encode_data, decode_shortcut.shape[2:]) - encode_data = fluid.layers.concat( - [encode_data, decode_shortcut], axis=1) - append_op_result(encode_data, 'concat') - with scope("separable_conv1"): - encode_data = seperate_conv( - encode_data, encode_channel, 1, 3, dilation=1, act=relu) - with scope("separable_conv2"): - encode_data = seperate_conv( - encode_data, encode_channel, 1, 3, dilation=1, act=relu) - return encode_data - - -def deeplabv3p(img): - global default_epsilon - append_op_result(img, 'img') - with scope('xception_65'): - default_epsilon = 1e-3 - # Entry flow - data, decode_shortcut = entry_flow(img) - # Middle flow - data = middle_flow(data) - # Exit flow - data = exit_flow(data) - default_epsilon = 1e-5 - encode_data = encoder(data) - encode_data = decoder(encode_data, decode_shortcut) - with scope('logit'): - logit = conv( - encode_data, label_number, 1, stride=1, padding=0, bias_attr=True) - logit = fluid.layers.resize_bilinear(logit, img.shape[2:]) - return logit diff --git a/PaddleCV/deeplabv3+/reader.py b/PaddleCV/deeplabv3+/reader.py deleted file mode 100644 index 2ebc7e9084ad481de58bfdc790a56006b7e9c62e..0000000000000000000000000000000000000000 --- a/PaddleCV/deeplabv3+/reader.py +++ /dev/null @@ -1,205 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import cv2 -import numpy as np -import os -import six -import time -from data_utils import GeneratorEnqueuer - -default_config = { - "shuffle": True, - "min_resize": 0.5, - "max_resize": 4, - "crop_size": 769, -} - -# used for ce -if 'ce_mode' in os.environ: - np.random.seed(0) - - -def slice_with_pad(a, s, value=0): - pads = [] - slices = [] - for i in range(len(a.shape)): - if i >= len(s): - pads.append([0, 0]) - slices.append([0, a.shape[i]]) - else: - l, r = s[i] - if l < 0: - pl = -l - l = 0 - else: - pl = 0 - if r > a.shape[i]: - pr = r - a.shape[i] - r = a.shape[i] - else: - pr = 0 - pads.append([pl, pr]) - slices.append([l, r]) - slices = list(map(lambda x: slice(x[0], x[1], 1), slices)) - a = a[tuple(slices)] - a = np.pad(a, pad_width=pads, mode='constant', constant_values=value) - return a - - -class CityscapeDataset: - def __init__(self, dataset_dir, subset='train', config=default_config): - with open(os.path.join(dataset_dir, subset + '.list'), 'r') as fr: - file_list = fr.readlines() - all_images = [] - all_labels = [] - for i in range(len(file_list)): - img_gt = file_list[i].strip().split(' ') - all_images.append(os.path.join(dataset_dir, img_gt[0])) - all_labels.append(os.path.join(dataset_dir, img_gt[1])) - - self.label_files = all_labels - self.img_files = all_images - self.index = 0 - self.subset = subset - self.dataset_dir = dataset_dir - self.config = config - self.reset() - - def reset(self, shuffle=False): - self.index = 0 - if self.config["shuffle"]: - np.random.shuffle(self.label_files) - - def next_img(self): - self.index += 1 - if self.index >= len(self.label_files): - self.reset() - - def get_img(self): - shape = self.config["crop_size"] - while True: - ln = self.label_files[self.index] - img_name = self.img_files[self.index] - label = cv2.imread(ln) - img = cv2.imread(img_name) - if img is None: - print("load img failed:", img_name) - self.next_img() - else: - break - if shape == -1: - return img, label, ln - - if np.random.rand() > 0.5: - range_l = 1 - range_r = self.config['max_resize'] - else: - range_l = self.config['min_resize'] - range_r = 1 - - if np.random.rand() > 0.5: - assert len(img.shape) == 3 and len( - label.shape) == 3, "{} {}".format(img.shape, label.shape) - img = img[:, :, ::-1] - label = label[:, :, ::-1] - - random_scale = np.random.rand(1) * (range_r - range_l) + range_l - crop_size = int(shape / random_scale) - bb = crop_size // 2 - - def _randint(low, high): - return int(np.random.rand(1) * (high - low) + low) - - offset_x = np.random.randint(bb, max(bb + 1, img.shape[0] - - bb)) - crop_size // 2 - offset_y = np.random.randint(bb, max(bb + 1, img.shape[1] - - bb)) - crop_size // 2 - img_crop = slice_with_pad(img, [[offset_x, offset_x + crop_size], - [offset_y, offset_y + crop_size]], 128) - img = cv2.resize(img_crop, (shape, shape)) - label_crop = slice_with_pad(label, [[offset_x, offset_x + crop_size], - [offset_y, offset_y + crop_size]], - 255) - label = cv2.resize( - label_crop, (shape, shape), interpolation=cv2.INTER_NEAREST) - return img, label, ln + str( - (offset_x, offset_y, crop_size, random_scale)) - - def get_batch(self, batch_size=1): - imgs = [] - labels = [] - names = [] - while len(imgs) < batch_size: - img, label, ln = self.get_img() - imgs.append(img) - labels.append(label) - names.append(ln) - self.next_img() - return np.array(imgs), np.array(labels), names - - def get_batch_generator(self, - batch_size, - total_step, - num_workers=8, - max_queue=32, - use_multiprocessing=True): - def do_get_batch(): - iter_id = 0 - while True: - imgs, labels, names = self.get_batch(batch_size) - labels = labels.astype(np.int32)[:, :, :, 0] - imgs = imgs[:, :, :, ::-1].transpose( - 0, 3, 1, 2).astype(np.float32) / (255.0 / 2) - 1 - yield imgs, labels, names - if not use_multiprocessing: - iter_id += 1 - if iter_id >= total_step: - break - - batches = do_get_batch() - if not use_multiprocessing: - try: - from prefetch_generator import BackgroundGenerator - batches = BackgroundGenerator(batches, 100) - except: - print( - "You can install 'prefetch_generator' for acceleration of data reading." - ) - return batches - - def reader(): - try: - enqueuer = GeneratorEnqueuer( - batches, use_multiprocessing=use_multiprocessing) - enqueuer.start(max_queue_size=max_queue, workers=num_workers) - generator_out = None - for i in range(total_step): - while enqueuer.is_running(): - if not enqueuer.queue.empty(): - generator_out = enqueuer.queue.get() - break - else: - time.sleep(0.02) - yield generator_out - generator_out = None - enqueuer.stop() - finally: - if enqueuer is not None: - enqueuer.stop() - - data_gen = reader() - return data_gen diff --git a/PaddleCV/deeplabv3+/train.py b/PaddleCV/deeplabv3+/train.py deleted file mode 100755 index 06860048d2d543836c1fd3d1941e4207eb4e4dde..0000000000000000000000000000000000000000 --- a/PaddleCV/deeplabv3+/train.py +++ /dev/null @@ -1,273 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os - - -def set_paddle_flags(flags): - for key, value in flags.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be -# set before `import paddle`. Otherwise, it would -# not take any effect. -set_paddle_flags({ - 'FLAGS_eager_delete_tensor_gb': 0, # enable GC - # You can omit the following settings, because the default - # value of FLAGS_memory_fraction_of_eager_deletion is 1, - # and default value of FLAGS_fast_eager_deletion_mode is 1 - 'FLAGS_memory_fraction_of_eager_deletion': 1, - 'FLAGS_fast_eager_deletion_mode': 1, - # Setting the default used gpu memory - 'FLAGS_fraction_of_gpu_memory_to_use': 0.98 -}) - -import paddle -import paddle.fluid as fluid -from paddle.fluid import profiler -import numpy as np -import argparse -from reader import CityscapeDataset -import reader -import models -import time -import contextlib -import paddle.fluid.profiler as profiler -import utility - -parser = argparse.ArgumentParser() -add_arg = lambda *args: utility.add_arguments(*args, argparser=parser) - -# yapf: disable -add_arg('batch_size', int, 4, "The number of images in each batch during training.") -add_arg('train_crop_size', int, 769, "Image crop size during training.") -add_arg('base_lr', float, 0.001, "The base learning rate for model training.") -add_arg('total_step', int, 500000, "Number of the training step.") -add_arg('init_weights_path', str, None, "Path of the initial weights in paddlepaddle format.") -add_arg('save_weights_path', str, None, "Path of the saved weights during training.") -add_arg('dataset_path', str, None, "Cityscape dataset path.") -add_arg('parallel', bool, True, "using ParallelExecutor.") -add_arg('use_gpu', bool, True, "Whether use GPU or CPU.") -add_arg('num_classes', int, 19, "Number of classes.") -add_arg('load_logit_layer', bool, True, "Load last logit fc layer or not. If you are training with different number of classes, you should set to False.") -add_arg('memory_optimize', bool, True, "Using memory optimizer.") -add_arg('norm_type', str, 'bn', "Normalization type, should be 'bn' or 'gn'.") -add_arg('profile', bool, False, "Enable profiler.") -add_arg('use_py_reader', bool, True, "Use py reader.") -add_arg('use_multiprocessing', bool, False, "Use multiprocessing.") -add_arg("num_workers", int, 8, "The number of python processes used to read and preprocess data.") -# NOTE: args for profiler, used for benchmark -add_arg("profiler_path", str, '/tmp/profile_file2', "the profiler output file path. (used for benchmark)") -parser.add_argument( - '--enable_ce', - action='store_true', - help='If set, run the task with continuous evaluation logs. Users can ignore this agument.') -#yapf: enable - -@contextlib.contextmanager -def profile_context(profile=True): - if profile: - with profiler.profiler('All', 'total', args.profiler_path): - yield - else: - yield - -def load_model(): - if os.path.isdir(args.init_weights_path): - load_vars = [ - x for x in tp.list_vars() - if isinstance(x, fluid.framework.Parameter) and x.name.find('logit') == - -1 - ] - if args.load_logit_layer: - fluid.io.load_params( - exe, dirname=args.init_weights_path, main_program=tp) - else: - fluid.io.load_vars(exe, dirname=args.init_weights_path, vars=load_vars) - else: - fluid.io.load_params( - exe, - dirname="", - filename=args.init_weights_path, - main_program=tp) - - - -def save_model(): - assert not os.path.isfile(args.save_weights_path) - fluid.io.save_params( - exe, dirname=args.save_weights_path, main_program=tp) - - -def loss(logit, label): - label_nignore = fluid.layers.less_than( - label.astype('float32'), - fluid.layers.assign(np.array([num_classes], 'float32')), - force_cpu=False).astype('float32') - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - logit = fluid.layers.reshape(logit, [-1, num_classes]) - label = fluid.layers.reshape(label, [-1, 1]) - label = fluid.layers.cast(label, 'int64') - label_nignore = fluid.layers.reshape(label_nignore, [-1, 1]) - logit = fluid.layers.softmax(logit, use_cudnn=False) - loss = fluid.layers.cross_entropy(logit, label, ignore_index=255) - label_nignore.stop_gradient = True - label.stop_gradient = True - return loss, label_nignore - - -args = parser.parse_args() -utility.print_arguments(args) -utility.check_gpu(args.use_gpu) - -models.clean() -models.bn_momentum = 0.9997 -models.dropout_keep_prop = 0.9 -models.label_number = args.num_classes -models.default_norm_type = args.norm_type -deeplabv3p = models.deeplabv3p - -sp = fluid.Program() -tp = fluid.Program() - -crop_size = args.train_crop_size -batch_size = args.batch_size -image_shape = [crop_size, crop_size] -reader.default_config['crop_size'] = crop_size -reader.default_config['shuffle'] = True -num_classes = args.num_classes -weight_decay = 0.00004 - -base_lr = args.base_lr -total_step = args.total_step - -# only for ce -if args.enable_ce: - SEED = 102 - sp.random_seed = SEED - tp.random_seed = SEED - reader.default_config['shuffle'] = False - -with fluid.program_guard(tp, sp): - if args.use_py_reader: - batch_size_each = batch_size // utility.get_device_count() - py_reader = fluid.layers.py_reader(capacity=64, - shapes=[[batch_size_each, 3] + image_shape, [batch_size_each] + image_shape], - dtypes=['float32', 'int32']) - img, label = fluid.layers.read_file(py_reader) - else: - img = fluid.layers.data( - name='img', shape=[3] + image_shape, dtype='float32') - label = fluid.layers.data(name='label', shape=image_shape, dtype='int32') - logit = deeplabv3p(img) - pred = fluid.layers.argmax(logit, axis=1).astype('int32') - loss, mask = loss(logit, label) - lr = fluid.layers.polynomial_decay( - base_lr, total_step, end_learning_rate=0, power=0.9) - area = fluid.layers.elementwise_max( - fluid.layers.reduce_mean(mask), - fluid.layers.assign(np.array( - [0.1], dtype=np.float32))) - loss_mean = fluid.layers.reduce_mean(loss) / area - loss_mean.persistable = True - - opt = fluid.optimizer.Momentum( - lr, - momentum=0.9, - regularization=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=weight_decay)) - optimize_ops, params_grads = opt.minimize(loss_mean, startup_program=sp) - # ir memory optimizer has some issues, we need to seed grad persistable to - # avoid this issue - for p,g in params_grads: g.persistable = True - - -exec_strategy = fluid.ExecutionStrategy() -exec_strategy.num_threads = utility.get_device_count() -exec_strategy.num_iteration_per_drop_scope = 100 -build_strategy = fluid.BuildStrategy() -if args.memory_optimize: - build_strategy.fuse_relu_depthwise_conv = True - build_strategy.enable_inplace = True - -place = fluid.CPUPlace() -if args.use_gpu: - place = fluid.CUDAPlace(0) -exe = fluid.Executor(place) -exe.run(sp) - -if args.init_weights_path: - print("load from:", args.init_weights_path) - load_model() - -dataset = reader.CityscapeDataset(args.dataset_path, 'train') - -if args.parallel: - binary = fluid.compiler.CompiledProgram(tp).with_data_parallel( - loss_name=loss_mean.name, - build_strategy=build_strategy, - exec_strategy=exec_strategy) -else: - binary = fluid.compiler.CompiledProgram(tp) - -if args.use_py_reader: - assert(batch_size % utility.get_device_count() == 0) - def data_gen(): - batches = dataset.get_batch_generator( - batch_size // utility.get_device_count(), - total_step * utility.get_device_count(), - use_multiprocessing=args.use_multiprocessing, num_workers=args.num_workers) - for b in batches: - yield b[0], b[1] - py_reader.decorate_tensor_provider(data_gen) - py_reader.start() -else: - batches = dataset.get_batch_generator(batch_size, total_step, use_multiprocessing=True, num_workers=args.num_workers) -total_time = 0.0 -epoch_idx = 0 -train_loss = 0 - -with profile_context(args.profile): - for i in range(total_step): - epoch_idx += 1 - begin_time = time.time() - if not args.use_py_reader: - imgs, labels, names = next(batches) - train_loss, = exe.run(binary, - feed={'img': imgs, - 'label': labels}, fetch_list=[loss_mean]) - else: - train_loss, = exe.run(binary, fetch_list=[loss_mean]) - train_loss = np.mean(train_loss) - end_time = time.time() - total_time += end_time - begin_time - - if i % 100 == 0: - print("Model is saved to", args.save_weights_path) - save_model() - print("step {:d}, loss: {:.6f}, step_time_cost: {:.3f} s".format( - i, train_loss, end_time - begin_time)) - -print("Training done. Model is saved to", args.save_weights_path) -save_model() - -if args.enable_ce: - gpu_num = utility.get_device_count() - print("kpis\teach_pass_duration_card%s\t%s" % - (gpu_num, total_time / epoch_idx)) - print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, train_loss)) diff --git a/PaddleCV/deeplabv3+/utility.py b/PaddleCV/deeplabv3+/utility.py deleted file mode 100644 index ce1bd1e683870560684212b89375b6f0f893c4b4..0000000000000000000000000000000000000000 --- a/PaddleCV/deeplabv3+/utility.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import distutils.util -import six -import paddle.fluid as fluid - - -def print_arguments(args): - """Print argparse's arguments. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - parser.add_argument("name", default="Jonh", type=str, help="User name.") - args = parser.parse_args() - print_arguments(args) - - :param args: Input argparse.Namespace for printing. - :type args: argparse.Namespace - """ - print("----------- Configuration Arguments -----------") - for arg, value in sorted(six.iteritems(vars(args))): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") - - -def add_arguments(argname, type, default, help, argparser, **kwargs): - """Add argparse's argument. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - add_argument("name", str, "Jonh", "User name.", parser) - args = parser.parse_args() - """ - type = distutils.util.strtobool if type == bool else type - argparser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) - - -def check_gpu(use_gpu): - """ - Log error and exit when set use_gpu=true in paddlepaddle - cpu version. - """ - err = "Config use_gpu cannot be set as true while you are " \ - "using paddlepaddle cpu version ! \nPlease try: \n" \ - "\t1. Install paddlepaddle-gpu to run model on GPU \n" \ - "\t2. Set use_gpu as false in config file to run " \ - "model on CPU" - - try: - if use_gpu and not fluid.is_compiled_with_cuda(): - logger.error(err) - sys.exit(1) - except Exception as e: - pass - - -def get_device_count(): - try: - device_num = max(fluid.core.get_cuda_device_count(), 1) - except: - device_num = 1 - - return device_num diff --git a/PaddleCV/icnet/.run_ce.sh b/PaddleCV/icnet/.run_ce.sh deleted file mode 100755 index 643c1ed4cd1bd1012935e063cd8b3e3bbfd4f6d0..0000000000000000000000000000000000000000 --- a/PaddleCV/icnet/.run_ce.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -# This file is only used for continuous evaluation. - -export ce_mode=1 -rm -rf *_factor.txt -python train.py --use_gpu=True --random_mirror=False --random_scaling=False 1> log -cat log | python _ce.py diff --git a/PaddleCV/icnet/README.md b/PaddleCV/icnet/README.md deleted file mode 100644 index aa10c40f71e3adaa11859c424fc49acbec5b1b30..0000000000000000000000000000000000000000 --- a/PaddleCV/icnet/README.md +++ /dev/null @@ -1,110 +0,0 @@ -**该项目已被迁移至[PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg), 这个项目包含了更多的分割模型。** - -## 代码结构 -``` -├── network.py # 网络结构定义脚本 -├── train.py # 训练任务脚本 -├── eval.py # 评估脚本 -├── infer.py # 预测脚本 -├── cityscape.py # 数据预处理脚本 -└── utils.py # 定义通用的函数 -``` - -## 简介 - -Image Cascade Network(ICNet)主要用于图像实时语义分割。相较于其它压缩计算的方法,ICNet即考虑了速度,也考虑了准确性。 -ICNet的主要思想是将输入图像变换为不同的分辨率,然后用不同计算复杂度的子网络计算不同分辨率的输入,然后将结果合并。ICNet由三个子网络组成,计算复杂度高的网络处理低分辨率输入,计算复杂度低的网络处理分辨率高的网络,通过这种方式在高分辨率图像的准确性和低复杂度网络的效率之间获得平衡。 - -整个网络结构如下: - -

-
-图 1 -

- -同时推荐用户参考[ IPython Notebook demo](https://aistudio.baidu.com/aistudio/projectDetail/124368) - -## 数据准备 - - - -本文采用Cityscape数据集,请前往[Cityscape官网](https://www.cityscapes-dataset.com)注册下载。下载数据之后,按照[这里](https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/createTrainIdLabelImgs.py#L3)的说明和工具处理数据。 -处理之后的数据 -``` -data/cityscape/ -|-- gtFine -| |-- test -| |-- train -| `-- val -|-- leftImg8bit -| |-- test -| |-- train -| `-- val -|-- train.list -`-- val.list -``` -其中,train.list和val.list分别是用于训练和测试的列表文件,第一列为输入图像数据,第二列为标注数据,两列用空格分开。示例如下: -``` -leftImg8bit/train/stuttgart/stuttgart_000021_000019_leftImg8bit.png gtFine/train/stuttgart/stuttgart_000021_000019_gtFine_labelTrainIds.png -leftImg8bit/train/stuttgart/stuttgart_000072_000019_leftImg8bit.png gtFine/train/stuttgart/stuttgart_000072_000019_gtFine_labelTrainIds.png -``` -完成数据下载和准备后,需要修改`cityscape.py`脚本中对应的数据地址。 - -## 模型训练与预测 - -### 训练 -执行以下命令进行训练,同时指定checkpoint保存路径: -``` -python train.py --batch_size=16 --use_gpu=True --checkpoint_path="./chkpnt/" -``` -使用以下命令获得更多使用说明: -``` -python train.py --help -``` -训练过程中会根据用户的设置,输出训练集上每个网络分支的`loss`, 示例如下: -``` -Iter[0]; train loss: 2.338; sub4_loss: 3.367; sub24_loss: 4.120; sub124_loss: 0.151 -``` -### 测试 -执行以下命令在`Cityscape`测试数据集上进行测试: -``` -python eval.py --model_path="./chkpnt/100" --use_gpu=True -``` -需要通过选项`--model_path`指定模型文件。 -测试脚本的输出的评估指标为[mean IoU]()。 - -### 预测 -执行以下命令对指定的数据进行预测: -``` -python infer.py \ ---model_path="./chkpnt/100" \ ---images_path="./data/cityscape/" \ ---images_list="./data/cityscape/infer.list" -``` -通过选项`--images_list`指定列表文件,列表文件中每一行为一个要预测的图片的路径。 -预测结果默认保存到当前路径下的`output`文件夹下。 - -## 实验结果 -图2为在`CityScape`训练集上的训练的Loss曲线: - -

-
-图 2 -

- -在训练集上训练,在validation数据集上验证的结果为:mean_IoU=67.0%(论文67.7%) - -图3是使用`infer.py`脚本预测产生的结果示例,其中,第一行为输入的原始图片,第二行为人工的标注,第三行为我们模型计算的结果。 -

-
-图 3 -

- -## 其他信息 -|数据集 | pretrained model | -|---|---| -|CityScape | [pretrained_model](https://paddle-icnet-models.bj.bcebos.com/model_1000.tar.gz) | - -## 参考 - -- [ICNet for Real-Time Semantic Segmentation on High-Resolution Images](https://arxiv.org/abs/1704.08545) diff --git a/PaddleCV/icnet/_ce.py b/PaddleCV/icnet/_ce.py deleted file mode 100644 index 87c013b40e68107b8b75ec214d1191507b7847af..0000000000000000000000000000000000000000 --- a/PaddleCV/icnet/_ce.py +++ /dev/null @@ -1,57 +0,0 @@ -# this file is only used for continuous evaluation test! - -import os -import sys -sys.path.append(os.environ['ceroot']) -from kpi import CostKpi, DurationKpi, AccKpi - -# NOTE kpi.py should shared in models in some way!!!! - -train_cost_kpi = CostKpi('train_cost', 0.05, 0, actived=True) -train_duration_kpi = DurationKpi('train_duration', 0.02, 0, actived=True) - -tracking_kpis = [ - train_cost_kpi, - train_duration_kpi, -] - - -def parse_log(log): - ''' - This method should be implemented by model developers. - - The suggestion: - - each line in the log should be key, value, for example: - - " - train_cost\t1.0 - test_cost\t1.0 - train_cost\t1.0 - train_cost\t1.0 - train_acc\t1.2 - " - ''' - for line in log.split('\n'): - fs = line.strip().split('\t') - print(fs) - if len(fs) == 3 and fs[0] == 'kpis': - kpi_name = fs[1] - kpi_value = float(fs[2]) - yield kpi_name, kpi_value - - -def log_to_ce(log): - kpi_tracker = {} - for kpi in tracking_kpis: - kpi_tracker[kpi.name] = kpi - - for (kpi_name, kpi_value) in parse_log(log): - print(kpi_name, kpi_value) - kpi_tracker[kpi_name].add_record(kpi_value) - kpi_tracker[kpi_name].persist() - - -if __name__ == '__main__': - log = sys.stdin.read() - log_to_ce(log) diff --git a/PaddleCV/icnet/cityscape.py b/PaddleCV/icnet/cityscape.py deleted file mode 100644 index 37b9538d235c2e4e307cbf22e06b00a1ba02aeed..0000000000000000000000000000000000000000 --- a/PaddleCV/icnet/cityscape.py +++ /dev/null @@ -1,262 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Reader for Cityscape dataset. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import cv2 -import numpy as np -import paddle.dataset as dataset - -DATA_PATH = "./data/cityscape" -TRAIN_LIST = DATA_PATH + "/train.list" -TEST_LIST = DATA_PATH + "/val.list" -IGNORE_LABEL = 255 -NUM_CLASSES = 19 -TRAIN_DATA_SHAPE = (3, 720, 720) -TEST_DATA_SHAPE = (3, 1024, 2048) -IMG_MEAN = np.array((103.939, 116.779, 123.68), dtype=np.float32) - - -def train_data_shape(): - return TRAIN_DATA_SHAPE - - -def test_data_shape(): - return TEST_DATA_SHAPE - - -def num_classes(): - return NUM_CLASSES - - -class DataGenerater: - def __init__(self, data_list, mode="train", flip=True, scaling=True): - self.flip = flip - self.scaling = scaling - self.image_label = [] - with open(data_list, 'r') as f: - for line in f: - image_file, label_file = line.strip().split(' ') - self.image_label.append((image_file, label_file)) - - def create_train_reader(self, batch_size): - """ - Create a reader for train dataset. - """ - - def reader(): - np.random.shuffle(self.image_label) - images = [] - labels_sub1 = [] - labels_sub2 = [] - labels_sub4 = [] - count = 0 - for image, label in self.image_label: - image, label_sub1, label_sub2, label_sub4 = self.process_train_data( - image, label) - count += 1 - images.append(image) - labels_sub1.append(label_sub1) - labels_sub2.append(label_sub2) - labels_sub4.append(label_sub4) - if count == batch_size: - yield self.mask( - np.array(images), - np.array(labels_sub1), - np.array(labels_sub2), np.array(labels_sub4)) - images = [] - labels_sub1 = [] - labels_sub2 = [] - labels_sub4 = [] - count = 0 - if images: - yield self.mask( - np.array(images), - np.array(labels_sub1), - np.array(labels_sub2), np.array(labels_sub4)) - - return reader - - def create_test_reader(self): - """ - Create a reader for test dataset. - """ - - def reader(): - for image, label in self.image_label: - image, label = self.load(image, label) - image = dataset.image.to_chw(image)[np.newaxis, :] - label = label[np.newaxis, :, :, np.newaxis].astype("float32") - label_mask = np.where((label != IGNORE_LABEL).flatten())[ - 0].astype("int32") - yield image, label, label_mask - - return reader - - def process_train_data(self, image, label): - """ - Process training data. - """ - image, label = self.load(image, label) - if self.flip: - image, label = self.random_flip(image, label) - if self.scaling: - image, label = self.random_scaling(image, label) - image, label = self.resize(image, label, out_size=TRAIN_DATA_SHAPE[1:]) - label = label.astype("float32") - label_sub1 = dataset.image.to_chw(self.scale_label(label, factor=4)) - label_sub2 = dataset.image.to_chw(self.scale_label(label, factor=8)) - label_sub4 = dataset.image.to_chw(self.scale_label(label, factor=16)) - image = dataset.image.to_chw(image) - return image, label_sub1, label_sub2, label_sub4 - - def load(self, image, label): - """ - Load image from file. - """ - image = dataset.image.load_image( - DATA_PATH + "/" + image, is_color=True).astype("float32") - image -= IMG_MEAN - label = dataset.image.load_image( - DATA_PATH + "/" + label, is_color=False).astype("float32") - return image, label - - def random_flip(self, image, label): - """ - Flip image and label randomly. - """ - r = np.random.rand(1) - if r > 0.5: - image = dataset.image.left_right_flip(image, is_color=True) - label = dataset.image.left_right_flip(label, is_color=False) - return image, label - - def random_scaling(self, image, label): - """ - Scale image and label randomly. - """ - scale = np.random.uniform(0.5, 2.0, 1)[0] - h_new = int(image.shape[0] * scale) - w_new = int(image.shape[1] * scale) - image = cv2.resize(image, (w_new, h_new)) - label = cv2.resize( - label, (w_new, h_new), interpolation=cv2.INTER_NEAREST) - return image, label - - def padding_as(self, image, h, w, is_color): - """ - Padding image. - """ - pad_h = max(image.shape[0], h) - image.shape[0] - pad_w = max(image.shape[1], w) - image.shape[1] - if is_color: - return np.pad(image, ((0, pad_h), (0, pad_w), (0, 0)), 'constant') - else: - return np.pad(image, ((0, pad_h), (0, pad_w)), 'constant') - - def random_crop(self, im, out_shape, is_color=True): - h, w = im.shape[:2] - h_start = np.random.randint(0, h - out_shape[0] + 1) - w_start = np.random.randint(0, w - out_shape[1] + 1) - h_end, w_end = h_start + out_shape[0], w_start + out_shape[1] - if is_color: - im = im[h_start:h_end, w_start:w_end, :] - else: - im = im[h_start:h_end, w_start:w_end] - return im - - def resize(self, image, label, out_size): - """ - Resize image and label by padding or cropping. - """ - ignore_label = IGNORE_LABEL - label = label - ignore_label - if len(label.shape) == 2: - label = label[:, :, np.newaxis] - combined = np.concatenate((image, label), axis=2) - combined = self.padding_as( - combined, out_size[0], out_size[1], is_color=True) - combined = self.random_crop(combined, out_size, is_color=True) - image = combined[:, :, 0:3] - label = combined[:, :, 3:4] + ignore_label - return image, label - - def scale_label(self, label, factor): - """ - Scale label according to factor. - """ - h = label.shape[0] // factor - w = label.shape[1] // factor - return cv2.resize( - label, (h, w), interpolation=cv2.INTER_NEAREST)[:, :, np.newaxis] - - def mask(self, image, label0, label1, label2): - """ - Get mask for valid pixels. - """ - mask_sub1 = np.where(((label0 < (NUM_CLASSES + 1)) & ( - label0 != IGNORE_LABEL)).flatten())[0].astype("int32") - mask_sub2 = np.where(((label1 < (NUM_CLASSES + 1)) & ( - label1 != IGNORE_LABEL)).flatten())[0].astype("int32") - mask_sub4 = np.where(((label2 < (NUM_CLASSES + 1)) & ( - label2 != IGNORE_LABEL)).flatten())[0].astype("int32") - return image.astype( - "float32"), label0, mask_sub1, label1, mask_sub2, label2, mask_sub4 - - -def train(batch_size=32, flip=True, scaling=True): - """ - Cityscape training set reader. - It returns a reader, in which each result is a batch with batch_size samples. - - :param batch_size: The batch size of each result return by the reader. - :type batch_size: int - :param flip: Whether flip images randomly. - :type batch_size: bool - :param scaling: Whether scale images randomly. - :type batch_size: bool - :return: Training reader. - :rtype: callable - """ - reader = DataGenerater( - TRAIN_LIST, flip=flip, scaling=scaling).create_train_reader(batch_size) - return reader - - -def test(): - """ - Cityscape validation set reader. - It returns a reader, in which each result is a sample. - - :return: Training reader. - :rtype: callable - """ - reader = DataGenerater(TEST_LIST).create_test_reader() - return reader - - -def infer(image_list=TEST_LIST): - """ - Infer set reader. - It returns a reader, in which each result is a sample. - - :param image_list: The image list file in which each line is a path of image to be infered. - :type batch_size: str - :return: Infer reader. - :rtype: callable - """ - reader = DataGenerater(image_list).create_test_reader() diff --git a/PaddleCV/icnet/data/cityscape/gtFine/train/stuttgart/stuttgart_000021_000019_gtFine_labelTrainIds.png b/PaddleCV/icnet/data/cityscape/gtFine/train/stuttgart/stuttgart_000021_000019_gtFine_labelTrainIds.png deleted file mode 100644 index ea126e4fb7f8a7e4dbfc4aeb20e7fcdf934e249e..0000000000000000000000000000000000000000 Binary files a/PaddleCV/icnet/data/cityscape/gtFine/train/stuttgart/stuttgart_000021_000019_gtFine_labelTrainIds.png and /dev/null differ diff --git a/PaddleCV/icnet/data/cityscape/leftImg8bit/train/stuttgart/stuttgart_000021_000019_leftImg8bit.png b/PaddleCV/icnet/data/cityscape/leftImg8bit/train/stuttgart/stuttgart_000021_000019_leftImg8bit.png deleted file mode 100644 index 3994dd3da339c1fce21e48318e5866772dfb9451..0000000000000000000000000000000000000000 Binary files a/PaddleCV/icnet/data/cityscape/leftImg8bit/train/stuttgart/stuttgart_000021_000019_leftImg8bit.png and /dev/null differ diff --git a/PaddleCV/icnet/data/cityscape/train.list b/PaddleCV/icnet/data/cityscape/train.list deleted file mode 100644 index 71772de04014c2d1c9f735fa0238a53800af2673..0000000000000000000000000000000000000000 --- a/PaddleCV/icnet/data/cityscape/train.list +++ /dev/null @@ -1 +0,0 @@ -leftImg8bit/train/stuttgart/stuttgart_000021_000019_leftImg8bit.png gtFine/train/stuttgart/stuttgart_000021_000019_gtFine_labelTrainIds.png diff --git a/PaddleCV/icnet/eval.py b/PaddleCV/icnet/eval.py deleted file mode 100644 index a21ceb1bb6ce9eef1f1fd53a04d0077813f9686a..0000000000000000000000000000000000000000 --- a/PaddleCV/icnet/eval.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Evaluator for ICNet model.""" -import paddle.fluid as fluid -import numpy as np -from utils import add_arguments, print_arguments, get_feeder_data, check_gpu -from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter -from icnet import icnet -import cityscape -import argparse -import functools -import sys -import os - -parser = argparse.ArgumentParser(description=__doc__) -add_arg = functools.partial(add_arguments, argparser=parser) -# yapf: disable -add_arg('model_path', str, None, "Model path.") -add_arg('use_gpu', bool, True, "Whether use GPU to test.") -# yapf: enable - - -def cal_mean_iou(wrong, correct): - sum = wrong + correct - true_num = (sum != 0).sum() - for i in range(len(sum)): - if sum[i] == 0: - sum[i] = 1 - return (correct.astype("float64") / sum).sum() / true_num - - -def create_iou(predict, label, mask, num_classes, image_shape): - predict = fluid.layers.resize_bilinear(predict, out_shape=image_shape[1:3]) - predict = fluid.layers.transpose(predict, perm=[0, 2, 3, 1]) - predict = fluid.layers.reshape(predict, shape=[-1, num_classes]) - label = fluid.layers.reshape(label, shape=[-1, 1]) - _, predict = fluid.layers.topk(predict, k=1) - predict = fluid.layers.cast(predict, dtype="float32") - predict = fluid.layers.gather(predict, mask) - label = fluid.layers.gather(label, mask) - label = fluid.layers.cast(label, dtype="int32") - predict = fluid.layers.cast(predict, dtype="int32") - iou, out_w, out_r = fluid.layers.mean_iou(predict, label, num_classes) - return iou, out_w, out_r - - -def eval(args): - data_shape = cityscape.test_data_shape() - num_classes = cityscape.num_classes() - # define network - images = fluid.layers.data(name='image', shape=data_shape, dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int32') - mask = fluid.layers.data(name='mask', shape=[-1], dtype='int32') - - _, _, sub124_out = icnet(images, num_classes, - np.array(data_shape[1:]).astype("float32")) - iou, out_w, out_r = create_iou(sub124_out, label, mask, num_classes, - data_shape) - inference_program = fluid.default_main_program().clone(for_test=True) - # prepare environment - place = fluid.CPUPlace() - if args.use_gpu: - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - assert os.path.exists(args.model_path) - fluid.io.load_params(exe, args.model_path) - print("loaded model from: %s" % args.model_path) - sys.stdout.flush() - - fetch_vars = [iou, out_w, out_r] - out_wrong = np.zeros([num_classes]).astype("int64") - out_right = np.zeros([num_classes]).astype("int64") - count = 0 - test_reader = cityscape.test() - for data in test_reader(): - count += 1 - result = exe.run(inference_program, - feed=get_feeder_data( - data, place, for_test=True), - fetch_list=fetch_vars) - out_wrong += result[1] - out_right += result[2] - sys.stdout.flush() - iou = cal_mean_iou(out_wrong, out_right) - print("\nmean iou: %.3f" % iou) - print("kpis test_acc %f" % iou) - - -def main(): - args = parser.parse_args() - print_arguments(args) - check_gpu(args.use_gpu) - eval(args) - - -if __name__ == "__main__": - main() diff --git a/PaddleCV/icnet/icnet.py b/PaddleCV/icnet/icnet.py deleted file mode 100644 index c87a9785c027d4d5a298313bbbe7d7c4813a2a5e..0000000000000000000000000000000000000000 --- a/PaddleCV/icnet/icnet.py +++ /dev/null @@ -1,326 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -################################################################# -# -# Based on -#--------------------------------------------------------------- -# https://github.com/hszhao/ICNet -# Written by hszhao@cse.cuhk.edu.hk -#--------------------------------------------------------------- - - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle.fluid as fluid -import numpy as np -import sys - - -def conv(input, - k_h, - k_w, - c_o, - s_h, - s_w, - relu=False, - padding="VALID", - biased=False, - name=None): - act = None - tmp = input - if relu: - act = "relu" - if padding == "SAME": - padding_h = max(k_h - s_h, 0) - padding_w = max(k_w - s_w, 0) - padding_top = padding_h // 2 - padding_left = padding_w // 2 - padding_bottom = padding_h - padding_top - padding_right = padding_w - padding_left - padding = [ - 0, 0, 0, 0, padding_top, padding_bottom, padding_left, padding_right - ] - tmp = fluid.layers.pad(tmp, padding) - tmp = fluid.layers.conv2d( - tmp, - num_filters=c_o, - filter_size=[k_h, k_w], - stride=[s_h, s_w], - groups=1, - act=act, - bias_attr=biased, - use_cudnn=False, - name=name) - return tmp - - -def atrous_conv(input, - k_h, - k_w, - c_o, - dilation, - relu=False, - padding="VALID", - biased=False, - name=None): - act = None - if relu: - act = "relu" - tmp = input - if padding == "SAME": - padding_h = max(k_h - s_h, 0) - padding_w = max(k_w - s_w, 0) - padding_top = padding_h // 2 - padding_left = padding_w // 2 - padding_bottom = padding_h - padding_top - padding_right = padding_w - padding_left - padding = [ - 0, 0, 0, 0, padding_top, padding_bottom, padding_left, padding_right - ] - tmp = fluid.layers.pad(tmp, padding) - - tmp = fluid.layers.conv2d( - input, - num_filters=c_o, - filter_size=[k_h, k_w], - dilation=dilation, - groups=1, - act=act, - bias_attr=biased, - use_cudnn=False, - name=name) - return tmp - - -def zero_padding(input, padding): - return fluid.layers.pad(input, - [0, 0, 0, 0, padding, padding, padding, padding]) - - -def bn(input, relu=False, name=None, is_test=False): - act = None - if relu: - act = 'relu' - name = input.name.split(".")[0] + "_bn" - tmp = fluid.layers.batch_norm( - input, act=act, momentum=0.95, epsilon=1e-5, name=name) - return tmp - - -def avg_pool(input, k_h, k_w, s_h, s_w, name=None, padding=0): - temp = fluid.layers.pool2d( - input, - pool_size=[k_h, k_w], - pool_type="avg", - pool_stride=[s_h, s_w], - pool_padding=padding, - name=name) - return temp - - -def max_pool(input, k_h, k_w, s_h, s_w, name=None, padding=0): - temp = fluid.layers.pool2d( - input, - pool_size=[k_h, k_w], - pool_type="max", - pool_stride=[s_h, s_w], - pool_padding=padding, - name=name) - return temp - - -def interp(input, out_shape): - out_shape = list(out_shape.astype("int32")) - return fluid.layers.resize_bilinear(input, out_shape=out_shape) - - -def dilation_convs(input): - tmp = res_block(input, filter_num=256, padding=1, name="conv3_2") - tmp = res_block(tmp, filter_num=256, padding=1, name="conv3_3") - tmp = res_block(tmp, filter_num=256, padding=1, name="conv3_4") - - tmp = proj_block(tmp, filter_num=512, padding=2, dilation=2, name="conv4_1") - tmp = res_block(tmp, filter_num=512, padding=2, dilation=2, name="conv4_2") - tmp = res_block(tmp, filter_num=512, padding=2, dilation=2, name="conv4_3") - tmp = res_block(tmp, filter_num=512, padding=2, dilation=2, name="conv4_4") - tmp = res_block(tmp, filter_num=512, padding=2, dilation=2, name="conv4_5") - tmp = res_block(tmp, filter_num=512, padding=2, dilation=2, name="conv4_6") - - tmp = proj_block( - tmp, filter_num=1024, padding=4, dilation=4, name="conv5_1") - tmp = res_block(tmp, filter_num=1024, padding=4, dilation=4, name="conv5_2") - tmp = res_block(tmp, filter_num=1024, padding=4, dilation=4, name="conv5_3") - return tmp - - -def pyramis_pooling(input, input_shape): - shape = np.ceil(input_shape // 32).astype("int32") - h, w = shape - pool1 = avg_pool(input, h, w, h, w) - pool1_interp = interp(pool1, shape) - pool2 = avg_pool(input, h // 2, w // 2, h // 2, w // 2) - pool2_interp = interp(pool2, shape) - pool3 = avg_pool(input, h // 3, w // 3, h // 3, w // 3) - pool3_interp = interp(pool3, shape) - pool4 = avg_pool(input, h // 4, w // 4, h // 4, w // 4) - pool4_interp = interp(pool4, shape) - conv5_3_sum = input + pool4_interp + pool3_interp + pool2_interp + pool1_interp - return conv5_3_sum - - -def shared_convs(image): - tmp = conv(image, 3, 3, 32, 2, 2, padding='SAME', name="conv1_1_3_3_s2") - tmp = bn(tmp, relu=True) - tmp = conv(tmp, 3, 3, 32, 1, 1, padding='SAME', name="conv1_2_3_3") - tmp = bn(tmp, relu=True) - tmp = conv(tmp, 3, 3, 64, 1, 1, padding='SAME', name="conv1_3_3_3") - tmp = bn(tmp, relu=True) - tmp = max_pool(tmp, 3, 3, 2, 2, padding=[1, 1]) - - tmp = proj_block(tmp, filter_num=128, padding=0, name="conv2_1") - tmp = res_block(tmp, filter_num=128, padding=1, name="conv2_2") - tmp = res_block(tmp, filter_num=128, padding=1, name="conv2_3") - tmp = proj_block(tmp, filter_num=256, padding=1, stride=2, name="conv3_1") - return tmp - - -def res_block(input, filter_num, padding=0, dilation=None, name=None): - tmp = conv(input, 1, 1, filter_num // 4, 1, 1, name=name + "_1_1_reduce") - tmp = bn(tmp, relu=True) - tmp = zero_padding(tmp, padding=padding) - if dilation is None: - tmp = conv(tmp, 3, 3, filter_num // 4, 1, 1, name=name + "_3_3") - else: - tmp = atrous_conv( - tmp, 3, 3, filter_num // 4, dilation, name=name + "_3_3") - tmp = bn(tmp, relu=True) - tmp = conv(tmp, 1, 1, filter_num, 1, 1, name=name + "_1_1_increase") - tmp = bn(tmp, relu=False) - tmp = input + tmp - tmp = fluid.layers.relu(tmp) - return tmp - - -def proj_block(input, filter_num, padding=0, dilation=None, stride=1, - name=None): - proj = conv( - input, 1, 1, filter_num, stride, stride, name=name + "_1_1_proj") - proj_bn = bn(proj, relu=False) - - tmp = conv( - input, 1, 1, filter_num // 4, stride, stride, name=name + "_1_1_reduce") - tmp = bn(tmp, relu=True) - - tmp = zero_padding(tmp, padding=padding) - if padding == 0: - padding = 'SAME' - else: - padding = 'VALID' - if dilation is None: - tmp = conv( - tmp, - 3, - 3, - filter_num // 4, - 1, - 1, - padding=padding, - name=name + "_3_3") - else: - tmp = atrous_conv( - tmp, - 3, - 3, - filter_num // 4, - dilation, - padding=padding, - name=name + "_3_3") - - tmp = bn(tmp, relu=True) - tmp = conv(tmp, 1, 1, filter_num, 1, 1, name=name + "_1_1_increase") - tmp = bn(tmp, relu=False) - tmp = proj_bn + tmp - tmp = fluid.layers.relu(tmp) - return tmp - - -def sub_net_4(input, input_shape): - tmp = interp(input, out_shape=(input_shape // 32)) - tmp = dilation_convs(tmp) - tmp = pyramis_pooling(tmp, input_shape) - tmp = conv(tmp, 1, 1, 256, 1, 1, name="conv5_4_k1") - tmp = bn(tmp, relu=True) - tmp = interp(tmp, out_shape=np.ceil(input_shape / 16)) - return tmp - - -def sub_net_2(input): - tmp = conv(input, 1, 1, 128, 1, 1, name="conv3_1_sub2_proj") - tmp = bn(tmp, relu=False) - return tmp - - -def sub_net_1(input): - tmp = conv(input, 3, 3, 32, 2, 2, padding='SAME', name="conv1_sub1") - tmp = bn(tmp, relu=True) - tmp = conv(tmp, 3, 3, 32, 2, 2, padding='SAME', name="conv2_sub1") - tmp = bn(tmp, relu=True) - tmp = conv(tmp, 3, 3, 64, 2, 2, padding='SAME', name="conv3_sub1") - tmp = bn(tmp, relu=True) - tmp = conv(tmp, 1, 1, 128, 1, 1, name="conv3_sub1_proj") - tmp = bn(tmp, relu=False) - return tmp - - -def CCF24(sub2_out, sub4_out, input_shape): - tmp = zero_padding(sub4_out, padding=2) - tmp = atrous_conv(tmp, 3, 3, 128, 2, name="conv_sub4") - tmp = bn(tmp, relu=False) - tmp = tmp + sub2_out - tmp = fluid.layers.relu(tmp) - tmp = interp(tmp, input_shape // 8) - return tmp - - -def CCF124(sub1_out, sub24_out, input_shape): - tmp = zero_padding(sub24_out, padding=2) - tmp = atrous_conv(tmp, 3, 3, 128, 2, name="conv_sub2") - tmp = bn(tmp, relu=False) - tmp = tmp + sub1_out - tmp = fluid.layers.relu(tmp) - tmp = interp(tmp, input_shape // 4) - return tmp - - -def icnet(data, num_classes, input_shape): - image_sub1 = data - image_sub2 = interp(data, out_shape=input_shape * 0.5) - - s_convs = shared_convs(image_sub2) - sub4_out = sub_net_4(s_convs, input_shape) - sub2_out = sub_net_2(s_convs) - sub1_out = sub_net_1(image_sub1) - - sub24_out = CCF24(sub2_out, sub4_out, input_shape) - sub124_out = CCF124(sub1_out, sub24_out, input_shape) - - conv6_cls = conv( - sub124_out, 1, 1, num_classes, 1, 1, biased=True, name="conv6_cls") - sub4_out = conv( - sub4_out, 1, 1, num_classes, 1, 1, biased=True, name="sub4_out") - sub24_out = conv( - sub24_out, 1, 1, num_classes, 1, 1, biased=True, name="sub24_out") - - return sub4_out, sub24_out, conv6_cls diff --git a/PaddleCV/icnet/images/icnet.png b/PaddleCV/icnet/images/icnet.png deleted file mode 100644 index f261bb14a85eceac7cd5df282ebc43021b7760d9..0000000000000000000000000000000000000000 Binary files a/PaddleCV/icnet/images/icnet.png and /dev/null differ diff --git a/PaddleCV/icnet/images/result.png b/PaddleCV/icnet/images/result.png deleted file mode 100644 index b3b0b52ade05943b4a1d741fa4f3a947e8ac28ae..0000000000000000000000000000000000000000 Binary files a/PaddleCV/icnet/images/result.png and /dev/null differ diff --git a/PaddleCV/icnet/images/train_loss.png b/PaddleCV/icnet/images/train_loss.png deleted file mode 100644 index 15011073ae0bd55a9df853934f3329747ee9a426..0000000000000000000000000000000000000000 Binary files a/PaddleCV/icnet/images/train_loss.png and /dev/null differ diff --git a/PaddleCV/icnet/infer.py b/PaddleCV/icnet/infer.py deleted file mode 100644 index f1d7db512b99975135cbfd28c97899a5f8db3745..0000000000000000000000000000000000000000 --- a/PaddleCV/icnet/infer.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Infer for ICNet model.""" -from __future__ import print_function -import cityscape -import argparse -import functools -import sys -import os -import cv2 - -import paddle.fluid as fluid -import paddle -from icnet import icnet -from utils import add_arguments, print_arguments, get_feeder_data, check_gpu -from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter -import numpy as np - -IMG_MEAN = np.array((103.939, 116.779, 123.68), dtype=np.float32) -parser = argparse.ArgumentParser(description=__doc__) -add_arg = functools.partial(add_arguments, argparser=parser) -# yapf: disable -add_arg('model_path', str, None, "Model path.") -add_arg('images_list', str, None, "List file with images to be infered.") -add_arg('images_path', str, None, "The images path.") -add_arg('out_path', str, "./output", "Output path.") -add_arg('use_gpu', bool, True, "Whether use GPU to test.") -# yapf: enable - -data_shape = [3, 1024, 2048] -num_classes = 19 - -label_colours = [ - [128, 64, 128], - [244, 35, 231], - [69, 69, 69] - # 0 = road, 1 = sidewalk, 2 = building - , - [102, 102, 156], - [190, 153, 153], - [153, 153, 153] - # 3 = wall, 4 = fence, 5 = pole - , - [250, 170, 29], - [219, 219, 0], - [106, 142, 35] - # 6 = traffic light, 7 = traffic sign, 8 = vegetation - , - [152, 250, 152], - [69, 129, 180], - [219, 19, 60] - # 9 = terrain, 10 = sky, 11 = person - , - [255, 0, 0], - [0, 0, 142], - [0, 0, 69] - # 12 = rider, 13 = car, 14 = truck - , - [0, 60, 100], - [0, 79, 100], - [0, 0, 230] - # 15 = bus, 16 = train, 17 = motocycle - , - [119, 10, 32] -] - -# 18 = bicycle - - -def color(input): - """ - Convert infered result to color image. - """ - result = [] - for i in input.flatten(): - result.append( - [label_colours[i][2], label_colours[i][1], label_colours[i][0]]) - result = np.array(result).reshape([input.shape[0], input.shape[1], 3]) - return result - - -def infer(args): - data_shape = cityscape.test_data_shape() - num_classes = cityscape.num_classes() - # define network - images = fluid.layers.data(name='image', shape=data_shape, dtype='float32') - _, _, sub124_out = icnet(images, num_classes, - np.array(data_shape[1:]).astype("float32")) - predict = fluid.layers.resize_bilinear( - sub124_out, out_shape=data_shape[1:3]) - predict = fluid.layers.transpose(predict, perm=[0, 2, 3, 1]) - predict = fluid.layers.reshape(predict, shape=[-1, num_classes]) - _, predict = fluid.layers.topk(predict, k=1) - predict = fluid.layers.reshape( - predict, - shape=[data_shape[1], data_shape[2], -1]) # batch_size should be 1 - inference_program = fluid.default_main_program().clone(for_test=True) - # prepare environment - place = fluid.CPUPlace() - if args.use_gpu: - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - assert os.path.exists(args.model_path) - fluid.io.load_params(exe, args.model_path) - print("loaded model from: %s" % args.model_path) - sys.stdout.flush() - - if not os.path.isdir(args.out_path): - os.makedirs(args.out_path) - - for line in open(args.images_list): - image_file = args.images_path + "/" + line.strip() - filename = os.path.basename(image_file) - image = paddle.dataset.image.load_image( - image_file, is_color=True).astype("float32") - image -= IMG_MEAN - img = paddle.dataset.image.to_chw(image)[np.newaxis, :] - image_t = fluid.LoDTensor() - image_t.set(img, place) - result = exe.run(inference_program, - feed={"image": image_t}, - fetch_list=[predict]) - cv2.imwrite(args.out_path + "/" + filename + "_result.png", - color(result[0])) - print("Saved images into: %s" % args.out_path) - - -def main(): - args = parser.parse_args() - print_arguments(args) - check_gpu(args.use_gpu) - infer(args) - - -if __name__ == "__main__": - main() diff --git a/PaddleCV/icnet/train.py b/PaddleCV/icnet/train.py deleted file mode 100644 index ae616eee603c221b8bea475467a02ecf3785464a..0000000000000000000000000000000000000000 --- a/PaddleCV/icnet/train.py +++ /dev/null @@ -1,169 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Trainer for ICNet model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from icnet import icnet -import cityscape -import argparse -import functools -import sys -import os -import time -import paddle.fluid as fluid -import numpy as np -from utils import add_arguments, print_arguments, get_feeder_data, check_gpu -from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter - -if 'ce_mode' in os.environ: - np.random.seed(10) - fluid.default_startup_program().random_seed = 90 - -parser = argparse.ArgumentParser(description=__doc__) -add_arg = functools.partial(add_arguments, argparser=parser) -# yapf: disable -add_arg('batch_size', int, 16, "Minibatch size.") -add_arg('checkpoint_path', str, None, "Checkpoint svae path.") -add_arg('init_model', str, None, "Pretrain model path.") -add_arg('use_gpu', bool, True, "Whether use GPU to train.") -add_arg('random_mirror', bool, True, "Whether prepare by random mirror.") -add_arg('random_scaling', bool, True, "Whether prepare by random scaling.") -# yapf: enable - -LAMBDA1 = 0.16 -LAMBDA2 = 0.4 -LAMBDA3 = 1.0 -LEARNING_RATE = 0.003 -POWER = 0.9 -LOG_PERIOD = 1 -CHECKPOINT_PERIOD = 1000 -TOTAL_STEP = 60000 -if 'ce_mode' in os.environ: - TOTAL_STEP = 100 - -no_grad_set = [] - - -def create_loss(predict, label, mask, num_classes): - predict = fluid.layers.transpose(predict, perm=[0, 2, 3, 1]) - predict = fluid.layers.reshape(predict, shape=[-1, num_classes]) - label = fluid.layers.reshape(label, shape=[-1, 1]) - predict = fluid.layers.gather(predict, mask) - label = fluid.layers.gather(label, mask) - label = fluid.layers.cast(label, dtype="int64") - loss = fluid.layers.softmax_with_cross_entropy(predict, label) - no_grad_set.append(label.name) - return fluid.layers.reduce_mean(loss) - - -def poly_decay(): - global_step = _decay_step_counter() - decayed_lr = LEARNING_RATE * (fluid.layers.pow( - (1 - global_step / TOTAL_STEP), POWER)) - return decayed_lr - - -def train(args): - data_shape = cityscape.train_data_shape() - num_classes = cityscape.num_classes() - # define network - images = fluid.layers.data(name='image', shape=data_shape, dtype='float32') - label_sub1 = fluid.layers.data(name='label_sub1', shape=[1], dtype='int32') - label_sub2 = fluid.layers.data(name='label_sub2', shape=[1], dtype='int32') - label_sub4 = fluid.layers.data(name='label_sub4', shape=[1], dtype='int32') - mask_sub1 = fluid.layers.data(name='mask_sub1', shape=[-1], dtype='int32') - mask_sub2 = fluid.layers.data(name='mask_sub2', shape=[-1], dtype='int32') - mask_sub4 = fluid.layers.data(name='mask_sub4', shape=[-1], dtype='int32') - - sub4_out, sub24_out, sub124_out = icnet( - images, num_classes, np.array(data_shape[1:]).astype("float32")) - loss_sub4 = create_loss(sub4_out, label_sub4, mask_sub4, num_classes) - loss_sub24 = create_loss(sub24_out, label_sub2, mask_sub2, num_classes) - loss_sub124 = create_loss(sub124_out, label_sub1, mask_sub1, num_classes) - reduced_loss = LAMBDA1 * loss_sub4 + LAMBDA2 * loss_sub24 + LAMBDA3 * loss_sub124 - - regularizer = fluid.regularizer.L2Decay(0.0001) - optimizer = fluid.optimizer.Momentum( - learning_rate=poly_decay(), momentum=0.9, regularization=regularizer) - _, params_grads = optimizer.minimize(reduced_loss, no_grad_set=no_grad_set) - - # prepare environment - place = fluid.CPUPlace() - if args.use_gpu: - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - - exe.run(fluid.default_startup_program()) - - if args.init_model is not None: - print("load model from: %s" % args.init_model) - - def if_exist(var): - return os.path.exists(os.path.join(args.init_model, var.name)) - - fluid.io.load_vars(exe, args.init_model, predicate=if_exist) - - iter_id = 0 - t_loss = 0. - sub4_loss = 0. - sub24_loss = 0. - sub124_loss = 0. - train_reader = cityscape.train( - args.batch_size, flip=args.random_mirror, scaling=args.random_scaling) - start_time = time.time() - while True: - # train a pass - for data in train_reader(): - if iter_id > TOTAL_STEP: - end_time = time.time() - print("kpis train_duration %f" % (end_time - start_time)) - return - iter_id += 1 - results = exe.run( - feed=get_feeder_data(data, place), - fetch_list=[reduced_loss, loss_sub4, loss_sub24, loss_sub124]) - t_loss += results[0] - sub4_loss += results[1] - sub24_loss += results[2] - sub124_loss += results[3] - # training log - if iter_id % LOG_PERIOD == 0: - print( - "Iter[%d]; train loss: %.3f; sub4_loss: %.3f; sub24_loss: %.3f; sub124_loss: %.3f" - % (iter_id, t_loss / LOG_PERIOD, sub4_loss / LOG_PERIOD, - sub24_loss / LOG_PERIOD, sub124_loss / LOG_PERIOD)) - print("kpis train_cost %f" % (t_loss / LOG_PERIOD)) - - t_loss = 0. - sub4_loss = 0. - sub24_loss = 0. - sub124_loss = 0. - sys.stdout.flush() - - if iter_id % CHECKPOINT_PERIOD == 0 and args.checkpoint_path is not None: - dir_name = args.checkpoint_path + "/" + str(iter_id) - fluid.io.save_persistables(exe, dirname=dir_name) - print("Saved checkpoint: %s" % (dir_name)) - - -def main(): - args = parser.parse_args() - print_arguments(args) - check_gpu(args.use_gpu) - train(args) - - -if __name__ == "__main__": - main() diff --git a/PaddleCV/icnet/utils.py b/PaddleCV/icnet/utils.py deleted file mode 100644 index 34e28602dbc1d8149f7ff70638b85f72cf4b8cbc..0000000000000000000000000000000000000000 --- a/PaddleCV/icnet/utils.py +++ /dev/null @@ -1,134 +0,0 @@ -"""Contains common utility functions.""" -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import distutils.util -import numpy as np -import six -import paddle.fluid as fluid - - -def print_arguments(args): - """Print argparse's arguments. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - parser.add_argument("name", default="Jonh", type=str, help="User name.") - args = parser.parse_args() - print_arguments(args) - - :param args: Input argparse.Namespace for printing. - :type args: argparse.Namespace - """ - print("----------- Configuration Arguments -----------") - for arg, value in sorted(six.iteritems(vars(args))): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") - - -def add_arguments(argname, type, default, help, argparser, **kwargs): - """Add argparse's argument. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - add_argument("name", str, "Jonh", "User name.", parser) - args = parser.parse_args() - """ - type = distutils.util.strtobool if type == bool else type - argparser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) - - -def to_lodtensor(data, place): - seq_lens = [len(seq) for seq in data] - cur_len = 0 - lod = [cur_len] - for l in seq_lens: - cur_len += l - lod.append(cur_len) - flattened_data = np.concatenate(data, axis=0).astype("int32") - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - res = fluid.LoDTensor() - res.set(flattened_data, place) - res.set_lod([lod]) - return res - - -def get_feeder_data(data, place, for_test=False): - feed_dict = {} - image_t = fluid.LoDTensor() - image_t.set(data[0], place) - feed_dict["image"] = image_t - - if not for_test: - labels_sub1_t = fluid.LoDTensor() - labels_sub2_t = fluid.LoDTensor() - labels_sub4_t = fluid.LoDTensor() - mask_sub1_t = fluid.LoDTensor() - mask_sub2_t = fluid.LoDTensor() - mask_sub4_t = fluid.LoDTensor() - - labels_sub1_t.set(data[1], place) - labels_sub2_t.set(data[3], place) - mask_sub1_t.set(data[2], place) - mask_sub2_t.set(data[4], place) - labels_sub4_t.set(data[5], place) - mask_sub4_t.set(data[6], place) - feed_dict["label_sub1"] = labels_sub1_t - feed_dict["label_sub2"] = labels_sub2_t - feed_dict["mask_sub1"] = mask_sub1_t - feed_dict["mask_sub2"] = mask_sub2_t - feed_dict["label_sub4"] = labels_sub4_t - feed_dict["mask_sub4"] = mask_sub4_t - else: - label_t = fluid.LoDTensor() - mask_t = fluid.LoDTensor() - label_t.set(data[1], place) - mask_t.set(data[2], place) - feed_dict["label"] = label_t - feed_dict["mask"] = mask_t - - return feed_dict - - -def check_gpu(use_gpu): - """ - Log error and exit when set use_gpu=true in paddlepaddle - cpu version. - """ - err = "Config use_gpu cannot be set as true while you are " \ - "using paddlepaddle cpu version ! \nPlease try: \n" \ - "\t1. Install paddlepaddle-gpu to run model on GPU \n" \ - "\t2. Set use_gpu as false in config file to run " \ - "model on CPU" - - try: - if use_gpu and not fluid.is_compiled_with_cuda(): - logger.error(err) - sys.exit(1) - except Exception as e: - pass diff --git a/PaddleCV/rcnn/.gitignore b/PaddleCV/rcnn/.gitignore deleted file mode 100644 index 46bccb94f65af01009f2c65b4c1a495d6cc0eaf3..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -output/ -*.swp -*.log -log* -output* diff --git a/PaddleCV/rcnn/.run_ce.sh b/PaddleCV/rcnn/.run_ce.sh deleted file mode 100755 index 8062ee1cef875897fccb4220a6285a522acef961..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/.run_ce.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -export MKL_NUM_THREADS=1 -export OMP_NUM_THREADS=1 - - -cudaid=${face_detection:=0} # use 0-th card as default -export CUDA_VISIBLE_DEVICES=$cudaid - -FLAGS_benchmark=true python train.py --model_save_dir=output/ --data_dir=dataset/coco/ --max_iter=500 --enable_ce --pretrained_model=./imagenet_resnet50_fusebn --learning_rate=0.00125 | python _ce.py - - -cudaid=${face_detection_m:=0,1,2,3} # use 0,1,2,3 card as default -export CUDA_VISIBLE_DEVICES=$cudaid - -FLAGS_benchmark=true python train.py --model_save_dir=output/ --data_dir=dataset/coco/ --max_iter=500 --enable_ce --pretrained_model=./imagenet_resnet50_fusebn --learning_rate=0.005 | python _ce.py - diff --git a/PaddleCV/rcnn/README.md b/PaddleCV/rcnn/README.md deleted file mode 100644 index 803e10446c639d8987f3ef30b39e61db95038f0f..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/README.md +++ /dev/null @@ -1,219 +0,0 @@ -**This model has been moved to [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection), which includes more detection models.** - -# RCNN Objective Detection - ---- -## Table of Contents - -- [Installation](#installation) -- [Introduction](#introduction) -- [Data preparation](#data-preparation) -- [Training](#training) -- [Evaluation](#evaluation) -- [Inference and Visualization](#inference-and-visualization) - -## Installation - -Running sample code in this directory requires PaddelPaddle Fluid v.1.3.0 and later. If the PaddlePaddle on your device is lower than this version, please follow the instructions in [installation document](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/install/index_en.html) and make an update. - -We also recommend users to take a look at:  -[MaskRCNN](https://aistudio.baidu.com/aistudio/projectDetail/122273) -[Faster RCNN](https://aistudio.baidu.com/aistudio/projectDetail/122275) - -## Introduction - -Region Convolutional Neural Network (RCNN) models are two stages detector. According to proposals and feature extraction, obtain class and more precise proposals. -Now RCNN model contains two typical models: Faster RCNN and Mask RCNN. - -[Faster RCNN](https://arxiv.org/abs/1506.01497), The total framework of network can be divided into four parts: - -1. Base conv layer. As a CNN objective dection, Faster RCNN extract feature maps using a basic convolutional network. The feature maps then can be shared by RPN and fc layers. This sampel uses [ResNet-50](https://arxiv.org/abs/1512.03385) as base conv layer. -2. Region Proposal Network (RPN). RPN generates proposals for detection。This block generates anchors by a set of size and ratio and classifies anchors into fore-ground and back-ground by softmax. Then refine anchors to obtain more precise proposals using box regression. -3. RoI Align. This layer takes feature maps and proposals as input. The proposals are mapped to feature maps and pooled to the same size. The output are sent to fc layers for classification and regression. RoIPool and RoIAlign are used separately to this layer and it can be set in roi\_func in config.py. -4. Detection layer. Using the output of roi pooling to compute the class and locatoin of each proposal in two fc layers. - -[Mask RCNN](https://arxiv.org/abs/1703.06870) is a classical instance segmentation model and an extension of Faster RCNN - -Mask RCNN is a two stage model as well. At the first stage, it generates proposals from input images. At the second stage, it obtains class result, bbox and mask which is the result from segmentation branch on original Faster RCNN model. It decouples the relation between mask and classification. - -## Data preparation - -Train the model on [MS-COCO dataset](http://cocodataset.org/#download), download dataset as below: - -```bash -python dataset/coco/download.py -``` - -The data catalog structure is as follows: - - ``` - data/coco/ - ├── annotations - │   ├── instances_train2014.json - │   ├── instances_train2017.json - │   ├── instances_val2014.json - │   ├── instances_val2017.json - | ... - ├── train2017 - │   ├── 000000000009.jpg - │   ├── 000000580008.jpg - | ... - ├── val2017 - │   ├── 000000000139.jpg - │   ├── 000000000285.jpg - | ... - ``` - -## Training - -**download the pre-trained model:** This sample provides Resnet-50 pre-trained model which is converted from Caffe. The model fuses the parameters in batch normalization layer. One can download pre-trained model as: - - sh ./pretrained/download.sh - -**NOTE:** Windows users can download weights from links in `./pretrained/download.sh`. - -Set `pretrained_model` to load pre-trained model. In addition, this parameter is used to load trained model when finetuning as well. -Please make sure that pretrained_model is downloaded and loaded correctly, otherwise, the loss may be NAN during training. - -**Install the [cocoapi](https://github.com/cocodataset/cocoapi):** - -To train the model, [cocoapi](https://github.com/cocodataset/cocoapi) is needed. Install the cocoapi: - - git clone https://github.com/cocodataset/cocoapi.git - cd cocoapi/PythonAPI - # if cython is not installed - pip install Cython - # Install into global site-packages - make install - # Alternatively, if you do not have permissions or prefer - # not to install the COCO API into global site-packages - python2 setup.py install --user - -After data preparation, one can start the training step by: - -- Faster RCNN - - ``` - python train.py \ - --model_save_dir=output/ \ - --pretrained_model=${path_to_pretrain_model} \ - --data_dir=${path_to_data} \ - --MASK_ON=False - ``` - -- Mask RCNN - - ``` - python train.py \ - --model_save_dir=output/ \ - --pretrained_model=${path_to_pretrain_model} \ - --data_dir=${path_to_data} \ - --MASK_ON=True - ``` - - - Set ```export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7``` to specifiy 8 GPU to train. - - Set ```MASK_ON``` to choose Faster RCNN or Mask RCNN model. - - Set ```parallel``` to False to replace [fluid.ParallelExecutor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#parallelexecutor) to [fluid.Executor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#executor) when running the program in the Windows & GPU environment. - - For more help on arguments: - - python train.py --help - -**data reader introduction:** - -* Data reader is defined in `reader.py`. -* Scaling the short side of all images to `scales`. If the long side is larger than `max_size`, then scaling the long side to `max_size`. -* In training stage, images are horizontally flipped. -* Images in the same batch can be padding to the same size. - -**model configuration:** - -* Use RoIAlign and RoIPool separately. -* NMS threshold=0.7. During training, pre\_nms=12000, post\_nms=2000; during test, pre\_nms=6000, post\_nms=1000. -* In generating proposal lables, fg\_fraction=0.25, fg\_thresh=0.5, bg\_thresh_hi=0.5, bg\_thresh\_lo=0.0. -* In rpn target assignment, rpn\_fg\_fraction=0.5, rpn\_positive\_overlap=0.7, rpn\_negative\_overlap=0.3. - -**training strategy:** - -* Use momentum optimizer with momentum=0.9. -* Weight decay is 0.0001. -* In first 500 iteration, the learning rate increases linearly from 0.00333 to 0.01. Then lr is decayed at 120000, 160000 iteration with multiplier 0.1, 0.01. The maximum iteration is 180000. Also, we released a 2x model which has 360000 iterations and lr is decayed at 240000, 320000. These configuration can be set by max_iter and lr_steps in config.py. -* Set the learning rate of bias to two times as global lr in non basic convolutional layers. -* In basic convolutional layers, parameters of affine layers and res body do not update. - -## Evaluation - -Evaluation is to evaluate the performance of a trained model. This sample provides `eval_coco_map.py` which uses a COCO-specific mAP metric defined by [COCO committee](http://cocodataset.org/#detections-eval). - -`eval_coco_map.py` is the main executor for evalution, one can start evalution step by: - -- Faster RCNN - - ``` - python eval_coco_map.py \ - --dataset=coco2017 \ - --pretrained_model=${path_to_trained_model} \ - --MASK_ON=False - ``` - -- Mask RCNN - - ``` - python eval_coco_map.py \ - --dataset=coco2017 \ - --pretrained_model=${path_to_trainde_model} \ - --MASK_ON=True - ``` - - - Set ```--pretrained_model=${path_to_trained_model}``` to specifiy the trained model, not the initialized model. - - Set ```export CUDA_VISIBLE_DEVICES=0``` to specifiy one GPU to eval. - - Set ```MASK_ON``` to choose Faster RCNN or Mask RCNN model. - -Evalutaion result is shown as below: - -Faster RCNN: - -| Model | RoI function | Batch size | Max iteration | mAP | -| :--------------- | :--------: | :------------: | :------------------: |------: | -| [Fluid RoIPool minibatch padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_minibatch_padding.tar.gz) | RoIPool | 8 | 180000 | 0.316 | -| [Fluid RoIPool no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_no_padding.tar.gz) | RoIPool | 8 | 180000 | 0.318 | -| [Fluid RoIAlign no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding.tar.gz) | RoIAlign | 8 | 180000 | 0.348 | -| [Fluid RoIAlign no padding 2x](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding_2x.tar.gz) | RoIAlign | 8 | 360000 | 0.367 | - -* Fluid RoIPool minibatch padding: Use RoIPool. Images in one batch padding to the same size. This method is same as detectron. -* Fluid RoIPool no padding: Images without padding. -* Fluid RoIAlign no padding: Images without padding. -* Fluid RoIAlign no padding 2x: Images without padding, train for 360000 iterations, learning rate is decayed at 240000, 320000. - -Mask RCNN: - -| Model | Batch size | Max iteration | box mAP | mask mAP | -| :--------------- | :--------: | :------------: | :--------: |------: | -| [Fluid mask no padding](https://paddlemodels.bj.bcebos.com/faster_rcnn/Fluid_mask_no_padding.tar.gz) | 8 | 180000 | 0.359 | 0.314 | - -* Fluid mask no padding: Use RoIAlign. Images without padding. - -## Inference and Visualization - -Inference is used to get prediction score or image features based on trained models. `infer.py` is the main executor for inference, one can start infer step by: - -``` -python infer.py \ - --pretrained_model=${path_to_trained_model} \ - --image_path=dataset/coco/val2017/000000000139.jpg \ - --draw_threshold=0.6 -``` - -Please set the model path and image path correctly. GPU device is used by default, you can set `--use_gpu=False` to switch to CPU device. And you can set `draw_threshold` to tune score threshold to control the number of output detection boxes. - -Visualization of infer result is shown as below: -

- -
-Faster RCNN Visualization Examples -

- -

- -
-Mask RCNN Visualization Examples -

diff --git a/PaddleCV/rcnn/README_cn.md b/PaddleCV/rcnn/README_cn.md deleted file mode 100644 index bc232fe273b10b082a54d7c2c69b53726c24cb98..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/README_cn.md +++ /dev/null @@ -1,217 +0,0 @@ -**该项目已被迁移至[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection), 这个项目包含了更多的检测模型。** - -# RCNN 系列目标检测 - ---- -## 内容 - -- [安装](#安装) -- [简介](#简介) -- [数据准备](#数据准备) -- [模型训练](#模型训练) -- [模型评估](#模型评估) -- [模型推断及可视化](#模型推断及可视化) - -## 安装 - -在当前目录下运行样例代码需要PadddlePaddle Fluid的v.1.3.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/)中的说明来更新PaddlePaddle。 - -同时推荐用户参考: -[MaskRCNN](https://aistudio.baidu.com/aistudio/projectDetail/122273) -[Faster RCNN](https://aistudio.baidu.com/aistudio/projectDetail/122275) - -## 简介 -区域卷积神经网络(RCNN)系列模型为两阶段目标检测器。通过对图像生成候选区域,提取特征,判别特征类别并修正候选框位置。 -RCNN系列目前包含两个代表模型:Faster RCNN,Mask RCNN - -[Faster RCNN](https://arxiv.org/abs/1506.01497) 整体网络可以分为4个主要内容: - -1. 基础卷积层。作为一种卷积神经网络目标检测方法,Faster RCNN首先使用一组基础的卷积网络提取图像的特征图。特征图被后续RPN层和全连接层共享。本示例采用[ResNet-50](https://arxiv.org/abs/1512.03385)作为基础卷积层。 -2. 区域生成网络(RPN)。RPN网络用于生成候选区域(proposals)。该层通过一组固定的尺寸和比例得到一组锚点(anchors), 通过softmax判断锚点属于前景或者背景,再利用区域回归修正锚点从而获得精确的候选区域。 -3. RoI Align。该层收集输入的特征图和候选区域,将候选区域映射到特征图中并池化为统一大小的区域特征图,送入全连接层判定目标类别, 该层可选用RoIPool和RoIAlign两种方式,在config.py中设置roi\_func。 -4. 检测层。利用区域特征图计算候选区域的类别,同时再次通过区域回归获得检测框最终的精确位置。 - -[Mask RCNN](https://arxiv.org/abs/1703.06870) 扩展自Faster RCNN,是经典的实例分割模型。 - -Mask RCNN同样为两阶段框架,第一阶段扫描图像生成候选框;第二阶段根据候选框得到分类结果,边界框,同时在原有Faster RCNN模型基础上添加分割分支,得到掩码结果,实现了掩码和类别预测关系的解藕。 - - -## 数据准备 - -在[MS-COCO数据集](http://cocodataset.org/#download)上进行训练,通过如下方式下载数据集。 - -```bash -python dataset/coco/download.py -``` - -数据目录结构如下: - -``` -data/coco/ -├── annotations -│   ├── instances_train2014.json -│   ├── instances_train2017.json -│   ├── instances_val2014.json -│   ├── instances_val2017.json -| ... -├── train2017 -│   ├── 000000000009.jpg -│   ├── 000000580008.jpg -| ... -├── val2017 -│   ├── 000000000139.jpg -│   ├── 000000000285.jpg -| ... - -``` - -## 模型训练 - -**下载预训练模型:** 本示例提供Resnet-50预训练模型,该模性转换自Caffe,并对批标准化层(Batch Normalization Layer)进行参数融合。采用如下命令下载预训练模型: - - sh ./pretrained/download.sh - -**注意:** Windows用户可通过`./pretrained/download.sh`中的链接直接下载和解压。 - -通过初始化`pretrained_model` 加载预训练模型。同时在参数微调时也采用该设置加载已训练模型。 -请在训练前确认预训练模型下载与加载正确,否则训练过程中损失可能会出现NAN。 - -**安装[cocoapi](https://github.com/cocodataset/cocoapi):** - -训练前需要首先下载[cocoapi](https://github.com/cocodataset/cocoapi): - - git clone https://github.com/cocodataset/cocoapi.git - cd cocoapi/PythonAPI - # if cython is not installed - pip install Cython - # Install into global site-packages - make install - # Alternatively, if you do not have permissions or prefer - # not to install the COCO API into global site-packages - python2 setup.py install --user - -数据准备完毕后,可以通过如下的方式启动训练: - -- Faster RCNN - - ``` - python train.py \ - --model_save_dir=output/ \ - --pretrained_model=${path_to_pretrain_model} \ - --data_dir=${path_to_data} \ - --MASK_ON=False - ``` - -- Mask RCNN - - ``` - python train.py \ - --model_save_dir=output/ \ - --pretrained_model=${path_to_pretrain_model} \ - --data_dir=${path_to_data} \ - --MASK_ON=True - ``` - - - 通过设置export CUDA\_VISIBLE\_DEVICES=0,1,2,3,4,5,6,7指定8卡GPU训练。 - - 通过设置```MASK_ON```选择Faster RCNN和Mask RCNN模型。 - - 使用Windows GPU环境的用户,需要设置```parallel```为False,将[fluid.ParallelExecutor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#parallelexecutor)替换为[fluid.Executor](http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/fluid_cn.html#executor)。 - - 可选参数见: - - python train.py --help - -**数据读取器说明:** 数据读取器定义在reader.py中。所有图像将短边等比例缩放至`scales`,若长边大于`max_size`, 则再次将长边等比例缩放至`max_size`。在训练阶段,对图像采用水平翻转。支持将同一个batch内的图像padding为相同尺寸。 - -**模型设置:** - -* 分别使用RoIAlign和RoIPool两种方法。 -* 训练过程pre\_nms=12000, post\_nms=2000,测试过程pre\_nms=6000, post\_nms=1000。nms阈值为0.7。 -* RPN网络得到labels的过程中,fg\_fraction=0.25,fg\_thresh=0.5,bg\_thresh_hi=0.5,bg\_thresh\_lo=0.0 -* RPN选择anchor时,rpn\_fg\_fraction=0.5,rpn\_positive\_overlap=0.7,rpn\_negative\_overlap=0.3 - - -**训练策略:** - -* 采用momentum优化算法训练,momentum=0.9。 -* 权重衰减系数为0.0001,前500轮学习率从0.00333线性增加至0.01。在120000,160000轮时使用0.1,0.01乘子进行学习率衰减,最大训练180000轮。同时我们也提供了2x模型,该模型采用更多的迭代轮数进行训练,训练360000轮,学习率在240000,320000轮衰减,其他参数不变,训练最大轮数和学习率策略可以在config.py中对max_iter和lr_steps进行设置。 -* 非基础卷积层卷积bias学习率为整体学习率2倍。 -* 基础卷积层中,affine_layers参数不更新,res2层参数不更新。 - -## 模型评估 - -模型评估是指对训练完毕的模型评估各类性能指标。本示例采用[COCO官方评估](http://cocodataset.org/#detections-eval) - -`eval_coco_map.py`是评估模块的主要执行程序,调用示例如下: - -- Faster RCNN - - ``` - python eval_coco_map.py \ - --dataset=coco2017 \ - --pretrained_model=${path_to_trained_model} \ - --MASK_ON=False - ``` - -- Mask RCNN - - ``` - python eval_coco_map.py \ - --dataset=coco2017 \ - --pretrained_model=${path_to_trained_model} \ - --MASK_ON=True - ``` - - - 通过设置`--pretrained_model=${path_to_trained_model}`指定训练好的模型,注意不是初始化的模型。 - - 通过设置`export CUDA\_VISIBLE\_DEVICES=0`指定单卡GPU评估。 - - 通过设置```MASK_ON```选择Faster RCNN和Mask RCNN模型。 - -下表为模型评估结果: - -Faster RCNN - -| 模型 | RoI处理方式 | 批量大小 | 迭代次数 | mAP | -| :--------------- | :--------: | :------------: | :------------------: |------: | -| [Fluid RoIPool minibatch padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_minibatch_padding.tar.gz) | RoIPool | 8 | 180000 | 0.316 | -| [Fluid RoIPool no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_pool_no_padding.tar.gz) | RoIPool | 8 | 180000 | 0.318 | -| [Fluid RoIAlign no padding](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding.tar.gz) | RoIAlign | 8 | 180000 | 0.348 | -| [Fluid RoIAlign no padding 2x](http://paddlemodels.bj.bcebos.com/faster_rcnn/model_align_no_padding_2x.tar.gz) | RoIAlign | 8 | 360000 | 0.367 | - - - -* Fluid RoIPool minibatch padding: 使用RoIPool,同一个batch内的图像填充为相同尺寸。该方法与detectron处理相同。 -* Fluid RoIPool no padding: 使用RoIPool,不对图像做填充处理。 -* Fluid RoIAlign no padding: 使用RoIAlign,不对图像做填充处理。 -* Fluid RoIAlign no padding 2x: 使用RoIAlign,不对图像做填充处理。训练360000轮,学习率在240000,320000轮衰减。 - -Mask RCNN: - -| 模型 | 批量大小 | 迭代次数 | box mAP | mask mAP | -| :--------------- | :--------: | :------------: | :--------: |------: | -| [Fluid mask no padding](https://paddlemodels.bj.bcebos.com/faster_rcnn/Fluid_mask_no_padding.tar.gz) | 8 | 180000 | 0.359 | 0.314 | - -* Fluid mask no padding: 使用RoIAlign,不对图像做填充处理 - -## 模型推断及可视化 - -模型推断可以获取图像中的物体及其对应的类别,`infer.py`是主要执行程序,调用示例如下: - -``` -python infer.py \ - --pretrained_model=${path_to_trained_model} \ - --image_path=dataset/coco/val2017/000000000139.jpg \ - --draw_threshold=0.6 -``` - -注意,请正确设置模型路径`${path_to_trained_model}`和预测图片路径。默认使用GPU设备,也可通过设置`--use_gpu=False`使用CPU设备。可通过设置`draw_threshold`调节得分阈值控制检测框的个数。 - -下图为模型可视化预测结果: -

- -
-Faster RCNN 预测可视化 -

- -

- -
-Mask RCNN 预测可视化 -

diff --git a/PaddleCV/rcnn/__init__.py b/PaddleCV/rcnn/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/PaddleCV/rcnn/_ce.py b/PaddleCV/rcnn/_ce.py deleted file mode 100644 index e331d1bb7cccce5ac914dfa3417fe9090bd9cf99..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/_ce.py +++ /dev/null @@ -1,62 +0,0 @@ -# this file is only used for continuous evaluation test! - -import os -import sys -sys.path.append(os.environ['ceroot']) -from kpi import CostKpi -from kpi import DurationKpi - -each_pass_duration_card1_kpi = DurationKpi( - 'each_pass_duration_card1', 0.08, 0, actived=True) -train_loss_card1_kpi = CostKpi('train_loss_card1', 0.08, 0) -each_pass_duration_card4_kpi = DurationKpi( - 'each_pass_duration_card4', 0.08, 0, actived=True) -train_loss_card4_kpi = CostKpi('train_loss_card4', 0.08, 0) - -tracking_kpis = [ - each_pass_duration_card1_kpi, - train_loss_card1_kpi, - each_pass_duration_card4_kpi, - train_loss_card4_kpi, -] - - -def parse_log(log): - ''' - This method should be implemented by model developers. - - The suggestion: - - each line in the log should be key, value, for example: - - " - train_cost\t1.0 - test_cost\t1.0 - train_cost\t1.0 - train_cost\t1.0 - train_acc\t1.2 - " - ''' - for line in log.split('\n'): - fs = line.strip().split('\t') - print(fs) - if len(fs) == 3 and fs[0] == 'kpis': - kpi_name = fs[1] - kpi_value = float(fs[2]) - yield kpi_name, kpi_value - - -def log_to_ce(log): - kpi_tracker = {} - for kpi in tracking_kpis: - kpi_tracker[kpi.name] = kpi - - for (kpi_name, kpi_value) in parse_log(log): - print(kpi_name, kpi_value) - kpi_tracker[kpi_name].add_record(kpi_value) - kpi_tracker[kpi_name].persist() - - -if __name__ == '__main__': - log = sys.stdin.read() - log_to_ce(log) diff --git a/PaddleCV/rcnn/box_utils.py b/PaddleCV/rcnn/box_utils.py deleted file mode 100644 index bb3fe9c8f0cb261004578abba651ad7210518a22..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/box_utils.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Based on: -# -------------------------------------------------------- -# Detectron -# Copyright (c) 2017-present, Facebook, Inc. -# Licensed under the Apache License, Version 2.0; -# Written by Ross Girshick -# -------------------------------------------------------- - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np - - -def xywh_to_xyxy(xywh): - """Convert [x1 y1 w h] box format to [x1 y1 x2 y2] format.""" - if isinstance(xywh, (list, tuple)): - # Single box given as a list of coordinates - assert len(xywh) == 4 - x1, y1 = xywh[0], xywh[1] - x2 = x1 + np.maximum(0., xywh[2] - 1.) - y2 = y1 + np.maximum(0., xywh[3] - 1.) - return (x1, y1, x2, y2) - elif isinstance(xywh, np.ndarray): - # Multiple boxes given as a 2D ndarray - return np.hstack( - (xywh[:, 0:2], xywh[:, 0:2] + np.maximum(0, xywh[:, 2:4] - 1))) - else: - raise TypeError('Argument xywh must be a list, tuple, or numpy array.') - - -def xyxy_to_xywh(xyxy): - """Convert [x1 y1 x2 y2] box format to [x1 y1 w h] format.""" - if isinstance(xyxy, (list, tuple)): - # Single box given as a list of coordinates - assert len(xyxy) == 4 - x1, y1 = xyxy[0], xyxy[1] - w = xyxy[2] - x1 + 1 - h = xyxy[3] - y1 + 1 - return (x1, y1, w, h) - elif isinstance(xyxy, np.ndarray): - # Multiple boxes given as a 2D ndarray - return np.hstack((xyxy[:, 0:2], xyxy[:, 2:4] - xyxy[:, 0:2] + 1)) - else: - raise TypeError('Argument xyxy must be a list, tuple, or numpy array.') - - -def clip_xyxy_to_image(x1, y1, x2, y2, height, width): - """Clip coordinates to an image with the given height and width.""" - x1 = np.minimum(width - 1., np.maximum(0., x1)) - y1 = np.minimum(height - 1., np.maximum(0., y1)) - x2 = np.minimum(width - 1., np.maximum(0., x2)) - y2 = np.minimum(height - 1., np.maximum(0., y2)) - return x1, y1, x2, y2 - - -def nms(dets, thresh): - """Apply classic DPM-style greedy NMS.""" - if dets.shape[0] == 0: - return [] - x1 = dets[:, 0] - y1 = dets[:, 1] - x2 = dets[:, 2] - y2 = dets[:, 3] - scores = dets[:, 4] - - areas = (x2 - x1 + 1) * (y2 - y1 + 1) - order = scores.argsort()[::-1] - - ndets = dets.shape[0] - suppressed = np.zeros((ndets), dtype=np.int) - - # nominal indices - # _i, _j - # sorted indices - # i, j - # temp variables for box i's (the box currently under consideration) - # ix1, iy1, ix2, iy2, iarea - - # variables for computing overlap with box j (lower scoring box) - # xx1, yy1, xx2, yy2 - # w, h - # inter, ovr - - for _i in range(ndets): - i = order[_i] - if suppressed[i] == 1: - continue - ix1 = x1[i] - iy1 = y1[i] - ix2 = x2[i] - iy2 = y2[i] - iarea = areas[i] - for _j in range(_i + 1, ndets): - j = order[_j] - if suppressed[j] == 1: - continue - xx1 = max(ix1, x1[j]) - yy1 = max(iy1, y1[j]) - xx2 = min(ix2, x2[j]) - yy2 = min(iy2, y2[j]) - w = max(0.0, xx2 - xx1 + 1) - h = max(0.0, yy2 - yy1 + 1) - inter = w * h - ovr = inter / (iarea + areas[j] - inter) - if ovr >= thresh: - suppressed[j] = 1 - - return np.where(suppressed == 0)[0] - - -def expand_boxes(boxes, scale): - """Expand an array of boxes by a given scale.""" - w_half = (boxes[:, 2] - boxes[:, 0]) * .5 - h_half = (boxes[:, 3] - boxes[:, 1]) * .5 - x_c = (boxes[:, 2] + boxes[:, 0]) * .5 - y_c = (boxes[:, 3] + boxes[:, 1]) * .5 - - w_half *= scale - h_half *= scale - - boxes_exp = np.zeros(boxes.shape) - boxes_exp[:, 0] = x_c - w_half - boxes_exp[:, 2] = x_c + w_half - boxes_exp[:, 1] = y_c - h_half - boxes_exp[:, 3] = y_c + h_half - - return boxes_exp diff --git a/PaddleCV/rcnn/colormap.py b/PaddleCV/rcnn/colormap.py deleted file mode 100644 index 8c2447794fc2e9841b30c2cdf11e8fc70d20d764..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/colormap.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -# -# Based on: -# -------------------------------------------------------- -# Detectron -# Copyright (c) 2017-present, Facebook, Inc. -# Licensed under the Apache License, Version 2.0; -# Written by Ross Girshick -# -------------------------------------------------------- - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np - - -def colormap(rgb=False): - color_list = np.array([ - 0.000, 0.447, 0.741, 0.850, 0.325, 0.098, 0.929, 0.694, 0.125, 0.494, - 0.184, 0.556, 0.466, 0.674, 0.188, 0.301, 0.745, 0.933, 0.635, 0.078, - 0.184, 0.300, 0.300, 0.300, 0.600, 0.600, 0.600, 1.000, 0.000, 0.000, - 1.000, 0.500, 0.000, 0.749, 0.749, 0.000, 0.000, 1.000, 0.000, 0.000, - 0.000, 1.000, 0.667, 0.000, 1.000, 0.333, 0.333, 0.000, 0.333, 0.667, - 0.000, 0.333, 1.000, 0.000, 0.667, 0.333, 0.000, 0.667, 0.667, 0.000, - 0.667, 1.000, 0.000, 1.000, 0.333, 0.000, 1.000, 0.667, 0.000, 1.000, - 1.000, 0.000, 0.000, 0.333, 0.500, 0.000, 0.667, 0.500, 0.000, 1.000, - 0.500, 0.333, 0.000, 0.500, 0.333, 0.333, 0.500, 0.333, 0.667, 0.500, - 0.333, 1.000, 0.500, 0.667, 0.000, 0.500, 0.667, 0.333, 0.500, 0.667, - 0.667, 0.500, 0.667, 1.000, 0.500, 1.000, 0.000, 0.500, 1.000, 0.333, - 0.500, 1.000, 0.667, 0.500, 1.000, 1.000, 0.500, 0.000, 0.333, 1.000, - 0.000, 0.667, 1.000, 0.000, 1.000, 1.000, 0.333, 0.000, 1.000, 0.333, - 0.333, 1.000, 0.333, 0.667, 1.000, 0.333, 1.000, 1.000, 0.667, 0.000, - 1.000, 0.667, 0.333, 1.000, 0.667, 0.667, 1.000, 0.667, 1.000, 1.000, - 1.000, 0.000, 1.000, 1.000, 0.333, 1.000, 1.000, 0.667, 1.000, 0.167, - 0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, - 0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, - 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, - 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000, - 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, - 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.143, 0.143, 0.143, 0.286, - 0.286, 0.286, 0.429, 0.429, 0.429, 0.571, 0.571, 0.571, 0.714, 0.714, - 0.714, 0.857, 0.857, 0.857, 1.000, 1.000, 1.000 - ]).astype(np.float32) - color_list = color_list.reshape((-1, 3)) * 255 - if not rgb: - color_list = color_list[:, ::-1] - return color_list diff --git a/PaddleCV/rcnn/config.py b/PaddleCV/rcnn/config.py deleted file mode 100644 index 2a8ebdf7c1871f5863facd6e2138993ed4d7ffd1..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/config.py +++ /dev/null @@ -1,238 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from edict import AttrDict -import six -import numpy as np - -_C = AttrDict() -cfg = _C - -# -# Training options -# -_C.TRAIN = AttrDict() - -# scales an image's shortest side -_C.TRAIN.scales = [800] - -# max size of longest side -_C.TRAIN.max_size = 1333 - -# images per GPU in minibatch -_C.TRAIN.im_per_batch = 1 - -# roi minibatch size per image -_C.TRAIN.batch_size_per_im = 512 - -# target fraction of foreground roi minibatch -_C.TRAIN.fg_fractrion = 0.25 - -# overlap threshold for a foreground roi -_C.TRAIN.fg_thresh = 0.5 - -# overlap threshold for a background roi -_C.TRAIN.bg_thresh_hi = 0.5 -_C.TRAIN.bg_thresh_lo = 0.0 - -# If False, only resize image and not pad, image shape is different between -# GPUs in one mini-batch. If True, image shape is the same in one mini-batch. -_C.TRAIN.padding_minibatch = False - -# Snapshot period -_C.TRAIN.snapshot_iter = 10000 - -# number of RPN proposals to keep before NMS -_C.TRAIN.rpn_pre_nms_top_n = 12000 - -# number of RPN proposals to keep after NMS -_C.TRAIN.rpn_post_nms_top_n = 2000 - -# NMS threshold used on RPN proposals -_C.TRAIN.rpn_nms_thresh = 0.7 - -# min size in RPN proposals -_C.TRAIN.rpn_min_size = 0.0 - -# eta for adaptive NMS in RPN -_C.TRAIN.rpn_eta = 1.0 - -# number of RPN examples per image -_C.TRAIN.rpn_batch_size_per_im = 256 - -# remove anchors out of the image -_C.TRAIN.rpn_straddle_thresh = 0. - -# target fraction of foreground examples pre RPN minibatch -_C.TRAIN.rpn_fg_fraction = 0.5 - -# min overlap between anchor and gt box to be a positive examples -_C.TRAIN.rpn_positive_overlap = 0.7 - -# max overlap between anchor and gt box to be a negative examples -_C.TRAIN.rpn_negative_overlap = 0.3 - -# stopgrad at a specified stage -_C.TRAIN.freeze_at = 2 - -# min area of ground truth box -_C.TRAIN.gt_min_area = -1 - -# Use horizontally-flipped images during training? -_C.TRAIN.use_flipped = True - -# -# Inference options -# -_C.TEST = AttrDict() - -# scales an image's shortest side -_C.TEST.scales = [800] - -# max size of longest side -_C.TEST.max_size = 1333 - -# eta for adaptive NMS in RPN -_C.TEST.rpn_eta = 1.0 - -# min score threshold to infer -_C.TEST.score_thresh = 0.05 - -# overlap threshold used for NMS -_C.TEST.nms_thresh = 0.5 - -# number of RPN proposals to keep before NMS -_C.TEST.rpn_pre_nms_top_n = 6000 - -# number of RPN proposals to keep after NMS -_C.TEST.rpn_post_nms_top_n = 1000 - -# min size in RPN proposals -_C.TEST.rpn_min_size = 0.0 - -# max number of detections -_C.TEST.detections_per_im = 100 - -# NMS threshold used on RPN proposals -_C.TEST.rpn_nms_thresh = 0.7 - -# -# Model options -# - -# Whether use mask rcnn head -_C.MASK_ON = True - -# weight for bbox regression targets -_C.bbox_reg_weights = [0.1, 0.1, 0.2, 0.2] - -# RPN anchor sizes -_C.anchor_sizes = [32, 64, 128, 256, 512] - -# RPN anchor ratio -_C.aspect_ratio = [0.5, 1, 2] - -# variance of anchors -_C.variances = [1., 1., 1., 1.] - -# stride of feature map -_C.rpn_stride = [16.0, 16.0] - -# Use roi pool or roi align, 'RoIPool' or 'RoIAlign' -_C.roi_func = 'RoIAlign' - -# sampling ratio for roi align -_C.sampling_ratio = 0 - -# pooled width and pooled height -_C.roi_resolution = 14 - -# spatial scale -_C.spatial_scale = 1. / 16. - -# resolution to represent mask labels -_C.resolution = 14 - -# Number of channels in the mask head -_C.dim_reduced = 256 - -# Threshold for converting soft masks to hard masks -_C.mrcnn_thresh_binarize = 0.5 - -# -# SOLVER options -# - -# derived learning rate the to get the final learning rate. -_C.learning_rate = 0.01 - -# maximum number of iterations, 1x: 180000, 2x:360000 -_C.max_iter = 180000 -#_C.max_iter = 360000 - -# warm up to learning rate -_C.warm_up_iter = 500 -_C.warm_up_factor = 1. / 3. - -# lr steps_with_decay, 1x: [120000, 160000], 2x: [240000, 320000] -_C.lr_steps = [120000, 160000] -#_C.lr_steps = [240000, 320000] -_C.lr_gamma = 0.1 - -# L2 regularization hyperparameter -_C.weight_decay = 0.0001 - -# momentum with SGD -_C.momentum = 0.9 - -# -# ENV options -# - -# support both CPU and GPU -_C.use_gpu = True - -# Whether use parallel -_C.parallel = True - -# Class number -_C.class_num = 81 - -# support pyreader -_C.use_pyreader = True - -# pixel mean values -_C.pixel_means = [102.9801, 115.9465, 122.7717] - -# clip box to prevent overflowing -_C.bbox_clip = np.log(1000. / 16.) - - -def merge_cfg_from_args(args, mode): - """Merge config keys, values in args into the global config.""" - if mode == 'train': - sub_d = _C.TRAIN - else: - sub_d = _C.TEST - for k, v in sorted(six.iteritems(vars(args))): - d = _C - try: - value = eval(v) - except: - value = v - if k in sub_d: - sub_d[k] = value - else: - d[k] = value diff --git a/PaddleCV/rcnn/data_utils.py b/PaddleCV/rcnn/data_utils.py deleted file mode 100644 index 86be7f1d49762c7e57180304edfde0810374449d..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/data_utils.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Based on: -# -------------------------------------------------------- -# Detectron -# Copyright (c) 2017-present, Facebook, Inc. -# Licensed under the Apache License, Version 2.0; -# Written by Ross Girshick -# -------------------------------------------------------- - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import cv2 -import numpy as np -from config import cfg -import os - - -class DatasetPath(object): - def __init__(self, mode): - self.mode = mode - mode_name = 'train' if mode == 'train' else 'val' - if cfg.dataset != 'coco2014' and cfg.dataset != 'coco2017': - raise NotImplementedError('Dataset {} not supported'.format( - cfg.dataset)) - self.sub_name = mode_name + cfg.dataset[-4:] - - def get_data_dir(self): - return os.path.join(cfg.data_dir, self.sub_name) - - def get_file_list(self): - sfile_list = 'annotations/instances_' + self.sub_name + '.json' - return os.path.join(cfg.data_dir, sfile_list) - - -def get_image_blob(roidb, mode): - """Builds an input blob from the images in the roidb at the specified - scales. - """ - if mode == 'train': - scales = cfg.TRAIN.scales - scale_ind = np.random.randint(0, high=len(scales)) - target_size = scales[scale_ind] - max_size = cfg.TRAIN.max_size - else: - target_size = cfg.TEST.scales[0] - max_size = cfg.TEST.max_size - im = cv2.imread(roidb['image']) - try: - assert im is not None - except AssertionError as e: - print('Failed to read image \'{}\''.format(roidb['image'])) - os._exit(0) - if roidb['flipped']: - im = im[:, ::-1, :] - im, im_scale = prep_im_for_blob(im, cfg.pixel_means, target_size, max_size) - - return im, im_scale - - -def prep_im_for_blob(im, pixel_means, target_size, max_size): - """Prepare an image for use as a network input blob. Specially: - - Subtract per-channel pixel mean - - Convert to float32 - - Rescale to each of the specified target size (capped at max_size) - Returns a list of transformed images, one for each target size. Also returns - the scale factors that were used to compute each returned image. - """ - im = im.astype(np.float32, copy=False) - im -= pixel_means - - im_shape = im.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - im_scale = float(target_size) / float(im_size_min) - # Prevent the biggest axis from being more than max_size - if np.round(im_scale * im_size_max) > max_size: - im_scale = float(max_size) / float(im_size_max) - im = cv2.resize( - im, - None, - None, - fx=im_scale, - fy=im_scale, - interpolation=cv2.INTER_LINEAR) - im_height, im_width, channel = im.shape - channel_swap = (2, 0, 1) #(batch, channel, height, width) - im = im.transpose(channel_swap) - return im, im_scale diff --git a/PaddleCV/rcnn/dataset/coco/download.py b/PaddleCV/rcnn/dataset/coco/download.py deleted file mode 100644 index 9df49bef6eab9d615e61e3cd429dcfdbeb5708ce..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/dataset/coco/download.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import os.path as osp -import sys -import zipfile -import logging - -from paddle.dataset.common import download - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -DATASETS = { - 'coco': [ - # coco2017 - ('http://images.cocodataset.org/zips/train2017.zip', - 'cced6f7f71b7629ddf16f17bbcfab6b2', ), - ('http://images.cocodataset.org/zips/val2017.zip', - '442b8da7639aecaf257c1dceb8ba8c80', ), - ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', - 'f4bbac642086de4f52a3fdda2de5fa2c', ), - # coco2014 - ('http://images.cocodataset.org/zips/train2014.zip', - '0da8c0bd3d6becc4dcb32757491aca88', ), - ('http://images.cocodataset.org/zips/val2014.zip', - 'a3d79f5ed8d289b7a7554ce06a5782b3', ), - ('http://images.cocodataset.org/annotations/annotations_trainval2014.zip', - '0a379cfc70b0e71301e0f377548639bd', ), - ], -} - - -def download_decompress_file(data_dir, url, md5): - logger.info("Downloading from {}".format(url)) - zip_file = download(url, data_dir, md5) - logger.info("Decompressing {}".format(zip_file)) - with zipfile.ZipFile(zip_file) as zf: - zf.extractall(path=data_dir) - os.remove(zip_file) - - -if __name__ == "__main__": - data_dir = osp.split(osp.realpath(sys.argv[0]))[0] - for name, infos in DATASETS.items(): - for info in infos: - download_decompress_file(data_dir, info[0], info[1]) - logger.info("Download dataset {} finished.".format(name)) diff --git a/PaddleCV/rcnn/dist_utils.py b/PaddleCV/rcnn/dist_utils.py deleted file mode 100644 index 49df856d950a689951a6e070b1c1810be196f758..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/dist_utils.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import paddle.fluid as fluid - - -def nccl2_prepare(trainer_id, startup_prog, main_prog): - config = fluid.DistributeTranspilerConfig() - config.mode = "nccl2" - t = fluid.DistributeTranspiler(config=config) - t.transpile( - trainer_id, - trainers=os.environ.get('PADDLE_TRAINER_ENDPOINTS'), - current_endpoint=os.environ.get('PADDLE_CURRENT_ENDPOINT'), - startup_program=startup_prog, - program=main_prog) - - -def prepare_for_multi_process(exe, build_strategy, train_prog): - # prepare for multi-process - trainer_id = int(os.environ.get('PADDLE_TRAINER_ID', 0)) - num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - if num_trainers < 2: return - print("PADDLE_TRAINERS_NUM", num_trainers) - print("PADDLE_TRAINER_ID", trainer_id) - build_strategy.num_trainers = num_trainers - build_strategy.trainer_id = trainer_id - # NOTE(zcd): use multi processes to train the model, - # and each process use one GPU card. - startup_prog = fluid.Program() - nccl2_prepare(trainer_id, startup_prog, train_prog) - # the startup_prog are run two times, but it doesn't matter. - exe.run(startup_prog) diff --git a/PaddleCV/rcnn/edict.py b/PaddleCV/rcnn/edict.py deleted file mode 100644 index 415cc6f7d6514a2fa79fb2a75bb23d8b8fd2fe72..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/edict.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - - -class AttrDict(dict): - def __init__(self, *args, **kwargs): - super(AttrDict, self).__init__(*args, **kwargs) - - def __getattr__(self, name): - if name in self.__dict__: - return self.__dict__[name] - elif name in self: - return self[name] - else: - raise AttributeError(name) - - def __setattr__(self, name, value): - if name in self.__dict__: - self.__dict__[name] = value - else: - self[name] = value diff --git a/PaddleCV/rcnn/eval_coco_map.py b/PaddleCV/rcnn/eval_coco_map.py deleted file mode 100644 index b0439fa053d869abc3d4da9d3622013308a522a5..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/eval_coco_map.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import io -import six -import time -import numpy as np -from eval_helper import * -import paddle -import paddle.fluid as fluid -import reader -from utility import print_arguments, parse_args, check_gpu -import models.model_builder as model_builder -import models.resnet as resnet -import json -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval, Params -from config import cfg -from data_utils import DatasetPath - - -def eval(): - - data_path = DatasetPath('val') - test_list = data_path.get_file_list() - - image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size] - class_nums = cfg.class_num - devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" - devices_num = len(devices.split(",")) - total_batch_size = devices_num * cfg.TRAIN.im_per_batch - cocoGt = COCO(test_list) - num_id_to_cat_id_map = {i + 1: v for i, v in enumerate(cocoGt.getCatIds())} - category_ids = cocoGt.getCatIds() - label_list = { - item['id']: item['name'] - for item in cocoGt.loadCats(category_ids) - } - label_list[0] = ['background'] - - model = model_builder.RCNN( - add_conv_body_func=resnet.add_ResNet50_conv4_body, - add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head, - use_pyreader=False, - mode='val') - model.build_model(image_shape) - pred_boxes = model.eval_bbox_out() - if cfg.MASK_ON: - masks = model.eval_mask_out() - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - # yapf: disable - if cfg.pretrained_model: - def if_exist(var): - return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) - fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) - - # yapf: enable - test_reader = reader.test(total_batch_size) - feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) - - dts_res = [] - segms_res = [] - if cfg.MASK_ON: - fetch_list = [pred_boxes, masks] - else: - fetch_list = [pred_boxes] - eval_start = time.time() - for batch_id, batch_data in enumerate(test_reader()): - start = time.time() - im_info = [] - for data in batch_data: - im_info.append(data[1]) - results = exe.run(fetch_list=[v.name for v in fetch_list], - feed=feeder.feed(batch_data), - return_numpy=False) - - pred_boxes_v = results[0] - if cfg.MASK_ON: - masks_v = results[1] - - new_lod = pred_boxes_v.lod() - nmsed_out = pred_boxes_v - - dts_res += get_dt_res(total_batch_size, new_lod[0], nmsed_out, - batch_data, num_id_to_cat_id_map) - - if cfg.MASK_ON and np.array(masks_v).shape != (1, 1): - segms_out = segm_results(nmsed_out, masks_v, im_info) - segms_res += get_segms_res(total_batch_size, new_lod[0], segms_out, - batch_data, num_id_to_cat_id_map) - end = time.time() - print('batch id: {}, time: {}'.format(batch_id, end - start)) - eval_end = time.time() - total_time = eval_end - eval_start - print('average time of eval is: {}'.format(total_time / (batch_id + 1))) - assert len(dts_res) > 0, "The number of valid bbox detected is zero.\n \ - Please use reasonable model and check input data." - - if cfg.MASK_ON: - assert len( - segms_res) > 0, "The number of valid mask detected is zero.\n \ - Please use reasonable model and check input data." - - with io.open("detection_bbox_result.json", 'w') as outfile: - encode_func = unicode if six.PY2 else str - outfile.write(encode_func(json.dumps(dts_res))) - print("start evaluate bbox using coco api") - cocoDt = cocoGt.loadRes("detection_bbox_result.json") - cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - - if cfg.MASK_ON: - with io.open("detection_segms_result.json", 'w') as outfile: - encode_func = unicode if six.PY2 else str - outfile.write(encode_func(json.dumps(segms_res))) - print("start evaluate mask using coco api") - cocoDt = cocoGt.loadRes("detection_segms_result.json") - cocoEval = COCOeval(cocoGt, cocoDt, 'segm') - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - - -if __name__ == '__main__': - args = parse_args() - print_arguments(args) - check_gpu(args.use_gpu) - eval() diff --git a/PaddleCV/rcnn/eval_helper.py b/PaddleCV/rcnn/eval_helper.py deleted file mode 100644 index dba67f6bbed2c87b5794dcc9c01a36205381e0d1..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/eval_helper.py +++ /dev/null @@ -1,386 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import os -import numpy as np -import paddle.fluid as fluid -import math -import box_utils -from PIL import Image -from PIL import ImageDraw -from PIL import ImageFont -from config import cfg -import pycocotools.mask as mask_util -import six -from colormap import colormap -import cv2 - - -def box_decoder(deltas, boxes, weights): - if boxes.shape[0] == 0: - return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) - - boxes = boxes.astype(deltas.dtype, copy=False) - - widths = boxes[:, 2] - boxes[:, 0] + 1.0 - heights = boxes[:, 3] - boxes[:, 1] + 1.0 - ctr_x = boxes[:, 0] + 0.5 * widths - ctr_y = boxes[:, 1] + 0.5 * heights - - wx, wy, ww, wh = weights - dx = deltas[:, 0::4] * wx - dy = deltas[:, 1::4] * wy - dw = deltas[:, 2::4] * ww - dh = deltas[:, 3::4] * wh - - # Prevent sending too large values into np.exp() - dw = np.minimum(dw, cfg.bbox_clip) - dh = np.minimum(dh, cfg.bbox_clip) - - pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] - pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] - pred_w = np.exp(dw) * widths[:, np.newaxis] - pred_h = np.exp(dh) * heights[:, np.newaxis] - - pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) - # x1 - pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w - # y1 - pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h - # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) - pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 - # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) - pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 - - return pred_boxes - - -def clip_tiled_boxes(boxes, im_shape): - """Clip boxes to image boundaries. im_shape is [height, width] and boxes - has shape (N, 4 * num_tiled_boxes).""" - assert boxes.shape[1] % 4 == 0, \ - 'boxes.shape[1] is {:d}, but must be divisible by 4.'.format( - boxes.shape[1] - ) - # x1 >= 0 - boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) - # y1 >= 0 - boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) - # x2 < im_shape[1] - boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) - # y2 < im_shape[0] - boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) - return boxes - - -def get_nmsed_box(rpn_rois, confs, locs, class_nums, im_info): - lod = rpn_rois.lod()[0] - rpn_rois_v = np.array(rpn_rois) - variance_v = np.array(cfg.bbox_reg_weights) - confs_v = np.array(confs) - locs_v = np.array(locs) - im_results = [[] for _ in range(len(lod) - 1)] - new_lod = [0] - for i in range(len(lod) - 1): - start = lod[i] - end = lod[i + 1] - if start == end: - continue - locs_n = locs_v[start:end, :] - rois_n = rpn_rois_v[start:end, :] - rois_n = rois_n / im_info[i][2] - rois_n = box_decoder(locs_n, rois_n, variance_v) - rois_n = clip_tiled_boxes(rois_n, im_info[i][:2] / im_info[i][2]) - - cls_boxes = [[] for _ in range(class_nums)] - scores_n = confs_v[start:end, :] - for j in range(1, class_nums): - inds = np.where(scores_n[:, j] > cfg.TEST.score_thresh)[0] - scores_j = scores_n[inds, j] - rois_j = rois_n[inds, j * 4:(j + 1) * 4] - dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype( - np.float32, copy=False) - keep = box_utils.nms(dets_j, cfg.TEST.nms_thresh) - nms_dets = dets_j[keep, :] - #add labels - label = np.array([j for _ in range(len(keep))]) - nms_dets = np.hstack((nms_dets, label[:, np.newaxis])).astype( - np.float32, copy=False) - cls_boxes[j] = nms_dets - # Limit to max_per_image detections **over all classes** - image_scores = np.hstack( - [cls_boxes[j][:, 1] for j in range(1, class_nums)]) - if len(image_scores) > cfg.TEST.detections_per_im: - image_thresh = np.sort(image_scores)[-cfg.TEST.detections_per_im] - for j in range(1, class_nums): - keep = np.where(cls_boxes[j][:, 1] >= image_thresh)[0] - cls_boxes[j] = cls_boxes[j][keep, :] - - im_results_n = np.vstack([cls_boxes[j] for j in range(1, class_nums)]) - im_results[i] = im_results_n - new_lod.append(len(im_results_n) + new_lod[-1]) - boxes = im_results_n[:, 2:] - scores = im_results_n[:, 1] - labels = im_results_n[:, 0] - im_results = np.vstack([im_results[k] for k in range(len(lod) - 1)]) - return new_lod, im_results - - -def get_dt_res(batch_size, lod, nmsed_out, data, num_id_to_cat_id_map): - dts_res = [] - nmsed_out_v = np.array(nmsed_out) - if nmsed_out_v.shape == ( - 1, - 1, ): - return dts_res - assert (len(lod) == batch_size + 1), \ - "Error Lod Tensor offset dimension. Lod({}) vs. batch_size({})"\ - .format(len(lod), batch_size) - k = 0 - for i in range(batch_size): - dt_num_this_img = lod[i + 1] - lod[i] - image_id = int(data[i][-1]) - image_width = int(data[i][1][1]) - image_height = int(data[i][1][2]) - for j in range(dt_num_this_img): - dt = nmsed_out_v[k] - k = k + 1 - num_id, score, xmin, ymin, xmax, ymax = dt.tolist() - category_id = num_id_to_cat_id_map[num_id] - w = xmax - xmin + 1 - h = ymax - ymin + 1 - bbox = [xmin, ymin, w, h] - dt_res = { - 'image_id': image_id, - 'category_id': category_id, - 'bbox': bbox, - 'score': score - } - dts_res.append(dt_res) - return dts_res - - -def get_segms_res(batch_size, lod, segms_out, data, num_id_to_cat_id_map): - segms_res = [] - segms_out_v = np.array(segms_out) - k = 0 - for i in range(batch_size): - dt_num_this_img = lod[i + 1] - lod[i] - image_id = int(data[i][-1]) - for j in range(dt_num_this_img): - dt = segms_out_v[k] - k = k + 1 - segm, num_id, score = dt.tolist() - cat_id = num_id_to_cat_id_map[num_id] - if six.PY3: - if 'counts' in segm: - segm['counts'] = segm['counts'].decode("utf8") - segm_res = { - 'image_id': image_id, - 'category_id': cat_id, - 'segmentation': segm, - 'score': score - } - segms_res.append(segm_res) - return segms_res - - -def draw_bounding_box_on_image(image_path, - nms_out, - draw_threshold, - labels_map, - image=None): - if image is None: - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - im_width, im_height = image.size - - for dt in np.array(nms_out): - num_id, score, xmin, ymin, xmax, ymax = dt.tolist() - if score < draw_threshold: - continue - draw.line( - [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), - (xmin, ymin)], - width=2, - fill='red') - if image.mode == 'RGB': - draw.text((xmin, ymin), labels_map[num_id], (255, 255, 0)) - image_name = image_path.split('/')[-1] - print("image with bbox drawed saved as {}".format(image_name)) - image.save(image_name) - - -def draw_mask_on_image(image_path, segms_out, draw_threshold, alpha=0.7): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - im_width, im_height = image.size - mask_color_id = 0 - w_ratio = .4 - image = np.array(image).astype('float32') - for dt in np.array(segms_out): - segm, num_id, score = dt.tolist() - if score < draw_threshold: - continue - mask = mask_util.decode(segm) * 255 - color_list = colormap(rgb=True) - color_mask = color_list[mask_color_id % len(color_list), 0:3] - mask_color_id += 1 - for c in range(3): - color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio * 255 - idx = np.nonzero(mask) - image[idx[0], idx[1], :] *= 1.0 - alpha - image[idx[0], idx[1], :] += alpha * color_mask - image = Image.fromarray(image.astype('uint8')) - return image - - -def segm_results(im_results, masks, im_info): - im_results = np.array(im_results) - class_num = cfg.class_num - M = cfg.resolution - scale = (M + 2.0) / M - lod = masks.lod()[0] - masks_v = np.array(masks) - boxes = im_results[:, 2:] - labels = im_results[:, 0] - segms_results = [[] for _ in range(len(lod) - 1)] - sum = 0 - for i in range(len(lod) - 1): - im_results_n = im_results[lod[i]:lod[i + 1]] - cls_segms = [] - masks_n = masks_v[lod[i]:lod[i + 1]] - boxes_n = boxes[lod[i]:lod[i + 1]] - labels_n = labels[lod[i]:lod[i + 1]] - im_h = int(round(im_info[i][0] / im_info[i][2])) - im_w = int(round(im_info[i][1] / im_info[i][2])) - boxes_n = box_utils.expand_boxes(boxes_n, scale) - boxes_n = boxes_n.astype(np.int32) - padded_mask = np.zeros((M + 2, M + 2), dtype=np.float32) - for j in range(len(im_results_n)): - class_id = int(labels_n[j]) - padded_mask[1:-1, 1:-1] = masks_n[j, class_id, :, :] - - ref_box = boxes_n[j, :] - w = ref_box[2] - ref_box[0] + 1 - h = ref_box[3] - ref_box[1] + 1 - w = np.maximum(w, 1) - h = np.maximum(h, 1) - - mask = cv2.resize(padded_mask, (w, h)) - mask = np.array(mask > cfg.mrcnn_thresh_binarize, dtype=np.uint8) - im_mask = np.zeros((im_h, im_w), dtype=np.uint8) - - x_0 = max(ref_box[0], 0) - x_1 = min(ref_box[2] + 1, im_w) - y_0 = max(ref_box[1], 0) - y_1 = min(ref_box[3] + 1, im_h) - im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[ - 1]), (x_0 - ref_box[0]):(x_1 - ref_box[0])] - sum += im_mask.sum() - rle = mask_util.encode( - np.array( - im_mask[:, :, np.newaxis], order='F'))[0] - cls_segms.append(rle) - segms_results[i] = np.array(cls_segms)[:, np.newaxis] - segms_results = np.vstack([segms_results[k] for k in range(len(lod) - 1)]) - im_results = np.hstack([segms_results, im_results]) - return im_results[:, :3] - - -def coco17_labels(): - labels_map = { - 0: 'background', - 1: 'person', - 2: 'bicycle', - 3: 'car', - 4: 'motorcycle', - 5: 'airplane', - 6: 'bus', - 7: 'train', - 8: 'truck', - 9: 'boat', - 10: 'traffic light', - 11: 'fire hydrant', - 12: 'stop sign', - 13: 'parking meter', - 14: 'bench', - 15: 'bird', - 16: 'cat', - 17: 'dog', - 18: 'horse', - 19: 'sheep', - 20: 'cow', - 21: 'elephant', - 22: 'bear', - 23: 'zebra', - 24: 'giraffe', - 25: 'backpack', - 26: 'umbrella', - 27: 'handbag', - 28: 'tie', - 29: 'suitcase', - 30: 'frisbee', - 31: 'skis', - 32: 'snowboard', - 33: 'sports ball', - 34: 'kite', - 35: 'baseball bat', - 36: 'baseball glove', - 37: 'skateboard', - 38: 'surfboard', - 39: 'tennis racket', - 40: 'bottle', - 41: 'wine glass', - 42: 'cup', - 43: 'fork', - 44: 'knife', - 45: 'spoon', - 46: 'bowl', - 47: 'banana', - 48: 'apple', - 49: 'sandwich', - 50: 'orange', - 51: 'broccoli', - 52: 'carrot', - 53: 'hot dog', - 54: 'pizza', - 55: 'donut', - 56: 'cake', - 57: 'chair', - 58: 'couch', - 59: 'potted plant', - 60: 'bed', - 61: 'dining table', - 62: 'toilet', - 63: 'tv', - 64: 'laptop', - 65: 'mouse', - 66: 'remote', - 67: 'keyboard', - 68: 'cell phone', - 69: 'microwave', - 70: 'oven', - 71: 'toaster', - 72: 'sink', - 73: 'refrigerator', - 74: 'book', - 75: 'clock', - 76: 'vase', - 77: 'scissors', - 78: 'teddy bear', - 79: 'hair drier', - 80: 'toothbrush' - } - return labels_map diff --git a/PaddleCV/rcnn/image/000000000139.jpg b/PaddleCV/rcnn/image/000000000139.jpg deleted file mode 100644 index 3c83a2cc4a9a2f13534d81f0c4ede78ae32c58cb..0000000000000000000000000000000000000000 Binary files a/PaddleCV/rcnn/image/000000000139.jpg and /dev/null differ diff --git a/PaddleCV/rcnn/image/000000000139_mask.jpg b/PaddleCV/rcnn/image/000000000139_mask.jpg deleted file mode 100644 index 47dfa9a435bf81c8585e8100413cfc0d6719754c..0000000000000000000000000000000000000000 Binary files a/PaddleCV/rcnn/image/000000000139_mask.jpg and /dev/null differ diff --git a/PaddleCV/rcnn/image/000000127517.jpg b/PaddleCV/rcnn/image/000000127517.jpg deleted file mode 100644 index 23d30251a5e386137b5881f4af48072abffad8dd..0000000000000000000000000000000000000000 Binary files a/PaddleCV/rcnn/image/000000127517.jpg and /dev/null differ diff --git a/PaddleCV/rcnn/image/000000127517_mask.jpg b/PaddleCV/rcnn/image/000000127517_mask.jpg deleted file mode 100644 index c0284591deadf6010bf780acf16124231c42d677..0000000000000000000000000000000000000000 Binary files a/PaddleCV/rcnn/image/000000127517_mask.jpg and /dev/null differ diff --git a/PaddleCV/rcnn/image/000000203864.jpg b/PaddleCV/rcnn/image/000000203864.jpg deleted file mode 100644 index f16ce4e05004404ff7353a8096318fd121e835a1..0000000000000000000000000000000000000000 Binary files a/PaddleCV/rcnn/image/000000203864.jpg and /dev/null differ diff --git a/PaddleCV/rcnn/image/000000515077.jpg b/PaddleCV/rcnn/image/000000515077.jpg deleted file mode 100644 index 61df889539b72f9b0a0b36c5731ff660a0955c46..0000000000000000000000000000000000000000 Binary files a/PaddleCV/rcnn/image/000000515077.jpg and /dev/null differ diff --git a/PaddleCV/rcnn/infer.py b/PaddleCV/rcnn/infer.py deleted file mode 100644 index d9d7ec6a05213a141057e33e1df02baa42b70d98..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/infer.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import time -import numpy as np -from eval_helper import * -import paddle -import paddle.fluid as fluid -import reader -from utility import print_arguments, parse_args, check_gpu -import models.model_builder as model_builder -import models.resnet as resnet -from config import cfg -from data_utils import DatasetPath - - -def infer(): - - try: - from pycocotools.coco import COCO - from pycocotools.cocoeval import COCOeval, Params - - data_path = DatasetPath('val') - test_list = data_path.get_file_list() - coco_api = COCO(test_list) - cid = coco_api.getCatIds() - cat_id_to_num_id_map = { - v: i + 1 - for i, v in enumerate(coco_api.getCatIds()) - } - category_ids = coco_api.getCatIds() - labels_map = { - cat_id_to_num_id_map[item['id']]: item['name'] - for item in coco_api.loadCats(category_ids) - } - labels_map[0] = 'background' - except: - print("The COCO dataset or COCO API is not exist, use the default " - "mapping of class index and real category name on COCO17.") - assert cfg.dataset == 'coco2017' - labels_map = coco17_labels() - - image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size] - class_nums = cfg.class_num - - model = model_builder.RCNN( - add_conv_body_func=resnet.add_ResNet50_conv4_body, - add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head, - use_pyreader=False, - mode='infer') - model.build_model(image_shape) - pred_boxes = model.eval_bbox_out() - if cfg.MASK_ON: - masks = model.eval_mask_out() - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - # yapf: disable - if not os.path.exists(cfg.pretrained_model): - raise ValueError("Model path [%s] does not exist." % (cfg.pretrained_model)) - - def if_exist(var): - return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) - fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) - # yapf: enable - infer_reader = reader.infer(cfg.image_path) - feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) - - dts_res = [] - segms_res = [] - if cfg.MASK_ON: - fetch_list = [pred_boxes, masks] - else: - fetch_list = [pred_boxes] - data = next(infer_reader()) - im_info = [data[0][1]] - result = exe.run(fetch_list=[v.name for v in fetch_list], - feed=feeder.feed(data), - return_numpy=False) - pred_boxes_v = result[0] - if cfg.MASK_ON: - masks_v = result[1] - new_lod = pred_boxes_v.lod() - nmsed_out = pred_boxes_v - image = None - if cfg.MASK_ON: - segms_out = segm_results(nmsed_out, masks_v, im_info) - image = draw_mask_on_image(cfg.image_path, segms_out, - cfg.draw_threshold) - - draw_bounding_box_on_image(cfg.image_path, nmsed_out, cfg.draw_threshold, - labels_map, image) - - -if __name__ == '__main__': - args = parse_args() - print_arguments(args) - check_gpu(args.use_gpu) - infer() diff --git a/PaddleCV/rcnn/learning_rate.py b/PaddleCV/rcnn/learning_rate.py deleted file mode 100644 index a4c9dfb6949054c86c8f307b0bfaa3eea61ee51c..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/learning_rate.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid -import paddle.fluid.layers.learning_rate_scheduler as lr_scheduler -from paddle.fluid.layers import control_flow - - -def exponential_with_warmup_decay(learning_rate, boundaries, values, - warmup_iter, warmup_factor): - global_step = lr_scheduler._decay_step_counter() - - lr = fluid.layers.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=True, - name="learning_rate") - - warmup_iter_var = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=float(warmup_iter), force_cpu=True) - - with control_flow.Switch() as switch: - with switch.case(global_step < warmup_iter_var): - alpha = global_step / warmup_iter_var - factor = warmup_factor * (1 - alpha) + alpha - decayed_lr = learning_rate * factor - fluid.layers.assign(decayed_lr, lr) - - for i in range(len(boundaries)): - boundary_val = fluid.layers.fill_constant( - shape=[1], - dtype='float32', - value=float(boundaries[i]), - force_cpu=True) - value_var = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=float(values[i])) - with switch.case(global_step < boundary_val): - fluid.layers.assign(value_var, lr) - - last_value_var = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=float(values[len(values) - 1])) - with switch.default(): - fluid.layers.assign(last_value_var, lr) - - return lr diff --git a/PaddleCV/rcnn/models/__init__.py b/PaddleCV/rcnn/models/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/PaddleCV/rcnn/models/model_builder.py b/PaddleCV/rcnn/models/model_builder.py deleted file mode 100644 index d46ae80d484703ee106f2f52a861718a83df1b7b..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/models/model_builder.py +++ /dev/null @@ -1,441 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Constant -from paddle.fluid.initializer import Normal -from paddle.fluid.initializer import MSRA -from paddle.fluid.regularizer import L2Decay -from config import cfg - - -class RCNN(object): - def __init__(self, - add_conv_body_func=None, - add_roi_box_head_func=None, - mode='train', - use_pyreader=True, - use_random=True): - self.add_conv_body_func = add_conv_body_func - self.add_roi_box_head_func = add_roi_box_head_func - self.mode = mode - self.use_pyreader = use_pyreader - self.use_random = use_random - - def build_model(self, image_shape): - self.build_input(image_shape) - body_conv = self.add_conv_body_func(self.image) - # RPN - self.rpn_heads(body_conv) - # Fast RCNN - self.fast_rcnn_heads(body_conv) - if self.mode != 'train': - self.eval_bbox() - # Mask RCNN - if cfg.MASK_ON: - self.mask_rcnn_heads(body_conv) - - def loss(self): - losses = [] - # Fast RCNN loss - loss_cls, loss_bbox = self.fast_rcnn_loss() - # RPN loss - rpn_cls_loss, rpn_reg_loss = self.rpn_loss() - losses = [loss_cls, loss_bbox, rpn_cls_loss, rpn_reg_loss] - rkeys = ['loss', 'loss_cls', 'loss_bbox', \ - 'loss_rpn_cls', 'loss_rpn_bbox',] - if cfg.MASK_ON: - loss_mask = self.mask_rcnn_loss() - losses = losses + [loss_mask] - rkeys = rkeys + ["loss_mask"] - loss = fluid.layers.sum(losses) - rloss = [loss] + losses - return rloss, rkeys - - def eval_mask_out(self): - return self.mask_fcn_logits - - def eval_bbox_out(self): - return self.pred_result - - def build_input(self, image_shape): - if self.use_pyreader: - in_shapes = [[-1] + image_shape, [-1, 4], [-1, 1], [-1, 1], - [-1, 3], [-1, 1]] - lod_levels = [0, 1, 1, 1, 0, 0] - dtypes = [ - 'float32', 'float32', 'int32', 'int32', 'float32', 'int64' - ] - if cfg.MASK_ON: - in_shapes.append([-1, 2]) - lod_levels.append(3) - dtypes.append('float32') - self.py_reader = fluid.layers.py_reader( - capacity=64, - shapes=in_shapes, - lod_levels=lod_levels, - dtypes=dtypes, - use_double_buffer=True) - ins = fluid.layers.read_file(self.py_reader) - self.image = ins[0] - self.gt_box = ins[1] - self.gt_label = ins[2] - self.is_crowd = ins[3] - self.im_info = ins[4] - self.im_id = ins[5] - if cfg.MASK_ON: - self.gt_masks = ins[6] - else: - self.image = fluid.layers.data( - name='image', shape=image_shape, dtype='float32') - self.gt_box = fluid.layers.data( - name='gt_box', shape=[4], dtype='float32', lod_level=1) - self.gt_label = fluid.layers.data( - name='gt_label', shape=[1], dtype='int32', lod_level=1) - self.is_crowd = fluid.layers.data( - name='is_crowd', shape=[1], dtype='int32', lod_level=1) - self.im_info = fluid.layers.data( - name='im_info', shape=[3], dtype='float32') - self.im_id = fluid.layers.data( - name='im_id', shape=[1], dtype='int64') - if cfg.MASK_ON: - self.gt_masks = fluid.layers.data( - name='gt_masks', shape=[2], dtype='float32', lod_level=3) - - def feeds(self): - if self.mode == 'infer': - return [self.image, self.im_info] - if self.mode == 'val': - return [self.image, self.im_info, self.im_id] - if not cfg.MASK_ON: - return [ - self.image, self.gt_box, self.gt_label, self.is_crowd, - self.im_info, self.im_id - ] - return [ - self.image, self.gt_box, self.gt_label, self.is_crowd, self.im_info, - self.im_id, self.gt_masks - ] - - def eval_bbox(self): - self.im_scale = fluid.layers.slice( - self.im_info, [1], starts=[2], ends=[3]) - im_scale_lod = fluid.layers.sequence_expand(self.im_scale, - self.rpn_rois) - boxes = self.rpn_rois / im_scale_lod - cls_prob = fluid.layers.softmax(self.cls_score, use_cudnn=False) - bbox_pred_reshape = fluid.layers.reshape(self.bbox_pred, - (-1, cfg.class_num, 4)) - decoded_box = fluid.layers.box_coder( - prior_box=boxes, - prior_box_var=cfg.bbox_reg_weights, - target_box=bbox_pred_reshape, - code_type='decode_center_size', - box_normalized=False, - axis=1) - cliped_box = fluid.layers.box_clip( - input=decoded_box, im_info=self.im_info) - self.pred_result = fluid.layers.multiclass_nms( - bboxes=cliped_box, - scores=cls_prob, - score_threshold=cfg.TEST.score_thresh, - nms_top_k=-1, - nms_threshold=cfg.TEST.nms_thresh, - keep_top_k=cfg.TEST.detections_per_im, - normalized=False) - - def rpn_heads(self, rpn_input): - # RPN hidden representation - dim_out = rpn_input.shape[1] - rpn_conv = fluid.layers.conv2d( - input=rpn_input, - num_filters=dim_out, - filter_size=3, - stride=1, - padding=1, - act='relu', - name='conv_rpn', - param_attr=ParamAttr( - name="conv_rpn_w", initializer=Normal( - loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) - self.anchor, self.var = fluid.layers.anchor_generator( - input=rpn_conv, - anchor_sizes=cfg.anchor_sizes, - aspect_ratios=cfg.aspect_ratio, - variance=cfg.variances, - stride=cfg.rpn_stride) - num_anchor = self.anchor.shape[2] - # Proposal classification scores - self.rpn_cls_score = fluid.layers.conv2d( - rpn_conv, - num_filters=num_anchor, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_cls_score', - param_attr=ParamAttr( - name="rpn_cls_logits_w", initializer=Normal( - loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="rpn_cls_logits_b", - learning_rate=2., - regularizer=L2Decay(0.))) - # Proposal bbox regression deltas - self.rpn_bbox_pred = fluid.layers.conv2d( - rpn_conv, - num_filters=4 * num_anchor, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_bbox_pred', - param_attr=ParamAttr( - name="rpn_bbox_pred_w", initializer=Normal( - loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="rpn_bbox_pred_b", - learning_rate=2., - regularizer=L2Decay(0.))) - - rpn_cls_score_prob = fluid.layers.sigmoid( - self.rpn_cls_score, name='rpn_cls_score_prob') - - param_obj = cfg.TRAIN if self.mode == 'train' else cfg.TEST - pre_nms_top_n = param_obj.rpn_pre_nms_top_n - post_nms_top_n = param_obj.rpn_post_nms_top_n - nms_thresh = param_obj.rpn_nms_thresh - min_size = param_obj.rpn_min_size - eta = param_obj.rpn_eta - self.rpn_rois, self.rpn_roi_probs = fluid.layers.generate_proposals( - scores=rpn_cls_score_prob, - bbox_deltas=self.rpn_bbox_pred, - im_info=self.im_info, - anchors=self.anchor, - variances=self.var, - pre_nms_top_n=pre_nms_top_n, - post_nms_top_n=post_nms_top_n, - nms_thresh=nms_thresh, - min_size=min_size, - eta=eta) - if self.mode == 'train': - outs = fluid.layers.generate_proposal_labels( - rpn_rois=self.rpn_rois, - gt_classes=self.gt_label, - is_crowd=self.is_crowd, - gt_boxes=self.gt_box, - im_info=self.im_info, - batch_size_per_im=cfg.TRAIN.batch_size_per_im, - fg_fraction=cfg.TRAIN.fg_fractrion, - fg_thresh=cfg.TRAIN.fg_thresh, - bg_thresh_hi=cfg.TRAIN.bg_thresh_hi, - bg_thresh_lo=cfg.TRAIN.bg_thresh_lo, - bbox_reg_weights=cfg.bbox_reg_weights, - class_nums=cfg.class_num, - use_random=self.use_random) - - self.rois = outs[0] - self.labels_int32 = outs[1] - self.bbox_targets = outs[2] - self.bbox_inside_weights = outs[3] - self.bbox_outside_weights = outs[4] - - if cfg.MASK_ON: - mask_out = fluid.layers.generate_mask_labels( - im_info=self.im_info, - gt_classes=self.gt_label, - is_crowd=self.is_crowd, - gt_segms=self.gt_masks, - rois=self.rois, - labels_int32=self.labels_int32, - num_classes=cfg.class_num, - resolution=cfg.resolution) - self.mask_rois = mask_out[0] - self.roi_has_mask_int32 = mask_out[1] - self.mask_int32 = mask_out[2] - - def fast_rcnn_heads(self, roi_input): - if self.mode == 'train': - pool_rois = self.rois - else: - pool_rois = self.rpn_rois - self.res5_2_sum = self.add_roi_box_head_func(roi_input, pool_rois) - rcnn_out = fluid.layers.pool2d( - self.res5_2_sum, pool_type='avg', pool_size=7, name='res5_pool') - self.cls_score = fluid.layers.fc(input=rcnn_out, - size=cfg.class_num, - act=None, - name='cls_score', - param_attr=ParamAttr( - name='cls_score_w', - initializer=Normal( - loc=0.0, scale=0.001)), - bias_attr=ParamAttr( - name='cls_score_b', - learning_rate=2., - regularizer=L2Decay(0.))) - self.bbox_pred = fluid.layers.fc(input=rcnn_out, - size=4 * cfg.class_num, - act=None, - name='bbox_pred', - param_attr=ParamAttr( - name='bbox_pred_w', - initializer=Normal( - loc=0.0, scale=0.01)), - bias_attr=ParamAttr( - name='bbox_pred_b', - learning_rate=2., - regularizer=L2Decay(0.))) - - def SuffixNet(self, conv5): - mask_out = fluid.layers.conv2d_transpose( - input=conv5, - num_filters=cfg.dim_reduced, - filter_size=2, - stride=2, - act='relu', - param_attr=ParamAttr( - name='conv5_mask_w', initializer=MSRA(uniform=False)), - bias_attr=ParamAttr( - name='conv5_mask_b', learning_rate=2., regularizer=L2Decay(0.))) - act_func = None - if self.mode != 'train': - act_func = 'sigmoid' - mask_fcn_logits = fluid.layers.conv2d( - input=mask_out, - num_filters=cfg.class_num, - filter_size=1, - act=act_func, - param_attr=ParamAttr( - name='mask_fcn_logits_w', initializer=MSRA(uniform=False)), - bias_attr=ParamAttr( - name="mask_fcn_logits_b", - learning_rate=2., - regularizer=L2Decay(0.))) - - if self.mode != 'train': - mask_fcn_logits = fluid.layers.lod_reset(mask_fcn_logits, - self.pred_result) - return mask_fcn_logits - - def mask_rcnn_heads(self, mask_input): - if self.mode == 'train': - conv5 = fluid.layers.gather(self.res5_2_sum, - self.roi_has_mask_int32) - self.mask_fcn_logits = self.SuffixNet(conv5) - else: - pred_res_shape = fluid.layers.shape(self.pred_result) - shape = fluid.layers.reduce_prod(pred_res_shape) - shape = fluid.layers.reshape(shape, [1, 1]) - ones = fluid.layers.fill_constant([1, 1], value=1, dtype='int32') - cond = fluid.layers.equal(x=shape, y=ones) - ie = fluid.layers.IfElse(cond) - - with ie.true_block(): - pred_res_null = ie.input(self.pred_result) - ie.output(pred_res_null) - with ie.false_block(): - pred_res = ie.input(self.pred_result) - pred_boxes = fluid.layers.slice( - pred_res, [1], starts=[2], ends=[6]) - im_scale_lod = fluid.layers.sequence_expand(self.im_scale, - pred_boxes) - mask_rois = pred_boxes * im_scale_lod - conv5 = self.add_roi_box_head_func(mask_input, mask_rois) - mask_fcn = self.SuffixNet(conv5) - ie.output(mask_fcn) - self.mask_fcn_logits = ie()[0] - - def mask_rcnn_loss(self): - mask_label = fluid.layers.cast(x=self.mask_int32, dtype='float32') - reshape_dim = cfg.class_num * cfg.resolution * cfg.resolution - mask_fcn_logits_reshape = fluid.layers.reshape(self.mask_fcn_logits, - (-1, reshape_dim)) - - loss_mask = fluid.layers.sigmoid_cross_entropy_with_logits( - x=mask_fcn_logits_reshape, - label=mask_label, - ignore_index=-1, - normalize=True) - loss_mask = fluid.layers.reduce_sum(loss_mask, name='loss_mask') - return loss_mask - - def fast_rcnn_loss(self): - labels_int64 = fluid.layers.cast(x=self.labels_int32, dtype='int64') - labels_int64.stop_gradient = True - loss_cls = fluid.layers.softmax_with_cross_entropy( - logits=self.cls_score, - label=labels_int64, - numeric_stable_mode=True, ) - loss_cls = fluid.layers.reduce_mean(loss_cls) - loss_bbox = fluid.layers.smooth_l1( - x=self.bbox_pred, - y=self.bbox_targets, - inside_weight=self.bbox_inside_weights, - outside_weight=self.bbox_outside_weights, - sigma=1.0) - loss_bbox = fluid.layers.reduce_mean(loss_bbox) - return loss_cls, loss_bbox - - def rpn_loss(self): - rpn_cls_score_reshape = fluid.layers.transpose( - self.rpn_cls_score, perm=[0, 2, 3, 1]) - rpn_bbox_pred_reshape = fluid.layers.transpose( - self.rpn_bbox_pred, perm=[0, 2, 3, 1]) - - anchor_reshape = fluid.layers.reshape(self.anchor, shape=(-1, 4)) - var_reshape = fluid.layers.reshape(self.var, shape=(-1, 4)) - - rpn_cls_score_reshape = fluid.layers.reshape( - x=rpn_cls_score_reshape, shape=(0, -1, 1)) - rpn_bbox_pred_reshape = fluid.layers.reshape( - x=rpn_bbox_pred_reshape, shape=(0, -1, 4)) - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - fluid.layers.rpn_target_assign( - bbox_pred=rpn_bbox_pred_reshape, - cls_logits=rpn_cls_score_reshape, - anchor_box=anchor_reshape, - anchor_var=var_reshape, - gt_boxes=self.gt_box, - is_crowd=self.is_crowd, - im_info=self.im_info, - rpn_batch_size_per_im=cfg.TRAIN.rpn_batch_size_per_im, - rpn_straddle_thresh=cfg.TRAIN.rpn_straddle_thresh, - rpn_fg_fraction=cfg.TRAIN.rpn_fg_fraction, - rpn_positive_overlap=cfg.TRAIN.rpn_positive_overlap, - rpn_negative_overlap=cfg.TRAIN.rpn_negative_overlap, - use_random=self.use_random) - score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32') - rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=score_pred, label=score_tgt) - rpn_cls_loss = fluid.layers.reduce_mean( - rpn_cls_loss, name='loss_rpn_cls') - - rpn_reg_loss = fluid.layers.smooth_l1( - x=loc_pred, - y=loc_tgt, - sigma=3.0, - inside_weight=bbox_weight, - outside_weight=bbox_weight) - rpn_reg_loss = fluid.layers.reduce_sum( - rpn_reg_loss, name='loss_rpn_bbox') - score_shape = fluid.layers.shape(score_tgt) - score_shape = fluid.layers.cast(x=score_shape, dtype='float32') - norm = fluid.layers.reduce_prod(score_shape) - norm.stop_gradient = True - rpn_reg_loss = rpn_reg_loss / norm - return rpn_cls_loss, rpn_reg_loss diff --git a/PaddleCV/rcnn/models/resnet.py b/PaddleCV/rcnn/models/resnet.py deleted file mode 100644 index 8093470241b3297c44a2e42b5162e25cac1514be..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/models/resnet.py +++ /dev/null @@ -1,181 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Constant -from paddle.fluid.regularizer import L2Decay -from config import cfg - - -def conv_bn_layer(input, - ch_out, - filter_size, - stride, - padding, - act='relu', - name=None): - conv1 = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=ParamAttr(name=name + "_biases"), - name=name + '.conv2d.output.1') - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - - return fluid.layers.batch_norm( - input=conv1, - act=act, - name=bn_name + '.output.1', - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - is_test=True) - - -def conv_affine_layer(input, - ch_out, - filter_size, - stride, - padding, - act='relu', - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False, - name=name + '.conv2d.output.1') - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=ParamAttr( - name=bn_name + '_scale', learning_rate=0.), - default_initializer=Constant(1.)) - scale.stop_gradient = True - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=ParamAttr( - bn_name + '_offset', learning_rate=0.), - default_initializer=Constant(0.)) - bias.stop_gradient = True - - out = fluid.layers.affine_channel(x=conv, scale=scale, bias=bias) - if act == 'relu': - out = fluid.layers.relu(x=out) - return out - - -def shortcut(input, ch_out, stride, name): - ch_in = input.shape[1] # if args.data_format == 'NCHW' else input.shape[-1] - if ch_in != ch_out: - return conv_affine_layer(input, ch_out, 1, stride, 0, None, name=name) - else: - return input - - -def basicblock(input, ch_out, stride, name): - short = shortcut(input, ch_out, stride, name=name) - conv1 = conv_affine_layer(input, ch_out, 3, stride, 1, name=name) - conv2 = conv_affine_layer(conv1, ch_out, 3, 1, 1, act=None, name=name) - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu', name=name) - - -def bottleneck(input, ch_out, stride, name): - short = shortcut(input, ch_out * 4, stride, name=name + "_branch1") - conv1 = conv_affine_layer( - input, ch_out, 1, stride, 0, name=name + "_branch2a") - conv2 = conv_affine_layer(conv1, ch_out, 3, 1, 1, name=name + "_branch2b") - conv3 = conv_affine_layer( - conv2, ch_out * 4, 1, 1, 0, act=None, name=name + "_branch2c") - return fluid.layers.elementwise_add( - x=short, y=conv3, act='relu', name=name + ".add.output.5") - - -def layer_warp(block_func, input, ch_out, count, stride, name): - res_out = block_func(input, ch_out, stride, name=name + "a") - for i in range(1, count): - res_out = block_func(res_out, ch_out, 1, name=name + chr(ord("a") + i)) - return res_out - - -ResNet_cfg = { - 18: ([2, 2, 2, 1], basicblock), - 34: ([3, 4, 6, 3], basicblock), - 50: ([3, 4, 6, 3], bottleneck), - 101: ([3, 4, 23, 3], bottleneck), - 152: ([3, 8, 36, 3], bottleneck) -} - - -def add_ResNet50_conv4_body(body_input): - stages, block_func = ResNet_cfg[50] - stages = stages[0:3] - conv1 = conv_affine_layer( - body_input, ch_out=64, filter_size=7, stride=2, padding=3, name="conv1") - pool1 = fluid.layers.pool2d( - input=conv1, - pool_type='max', - pool_size=3, - pool_stride=2, - pool_padding=1) - res2 = layer_warp(block_func, pool1, 64, stages[0], 1, name="res2") - if cfg.TRAIN.freeze_at == 2: - res2.stop_gradient = True - res3 = layer_warp(block_func, res2, 128, stages[1], 2, name="res3") - if cfg.TRAIN.freeze_at == 3: - res3.stop_gradient = True - res4 = layer_warp(block_func, res3, 256, stages[2], 2, name="res4") - if cfg.TRAIN.freeze_at == 4: - res4.stop_gradient = True - return res4 - - -def add_ResNet_roi_conv5_head(head_input, rois): - if cfg.roi_func == 'RoIPool': - pool = fluid.layers.roi_pool( - input=head_input, - rois=rois, - pooled_height=cfg.roi_resolution, - pooled_width=cfg.roi_resolution, - spatial_scale=cfg.spatial_scale) - elif cfg.roi_func == 'RoIAlign': - pool = fluid.layers.roi_align( - input=head_input, - rois=rois, - pooled_height=cfg.roi_resolution, - pooled_width=cfg.roi_resolution, - spatial_scale=cfg.spatial_scale, - sampling_ratio=cfg.sampling_ratio) - - res5 = layer_warp(bottleneck, pool, 512, 3, 2, name="res5") - return res5 diff --git a/PaddleCV/rcnn/pretrained/download.sh b/PaddleCV/rcnn/pretrained/download.sh deleted file mode 100644 index d6a28f28551e37e2110f78669fd812d73a52778b..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/pretrained/download.sh +++ /dev/null @@ -1,8 +0,0 @@ -DIR="$( cd "$(dirname "$0")" )" -cd "$DIR" - -# Download the data. -echo "Downloading..." -wget http://paddlemodels.bj.bcebos.com/faster_rcnn/imagenet_resnet50_fusebn.tar.gz -echo "Extracting..." -tar -xf imagenet_resnet50_fusebn.tar.gz diff --git a/PaddleCV/rcnn/reader.py b/PaddleCV/rcnn/reader.py deleted file mode 100644 index 7dded0ab4444e67d6251d4eb99622af85a325e8f..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/reader.py +++ /dev/null @@ -1,194 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import random -import numpy as np -import xml.etree.ElementTree -import os -import time -import copy -import six -import cv2 -from collections import deque - -from roidbs import JsonDataset -import data_utils -from config import cfg -import segm_utils -num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - - -def roidb_reader(roidb, mode): - im, im_scales = data_utils.get_image_blob(roidb, mode) - im_id = roidb['id'] - im_height = np.round(roidb['height'] * im_scales) - im_width = np.round(roidb['width'] * im_scales) - im_info = np.array([im_height, im_width, im_scales], dtype=np.float32) - if mode == 'val': - return im, im_info, im_id - - gt_boxes = roidb['gt_boxes'].astype('float32') - gt_classes = roidb['gt_classes'].astype('int32') - is_crowd = roidb['is_crowd'].astype('int32') - segms = roidb['segms'] - - outs = (im, gt_boxes, gt_classes, is_crowd, im_info, im_id) - - if cfg.MASK_ON: - gt_masks = [] - valid = True - segms = roidb['segms'] - assert len(segms) == is_crowd.shape[0] - for i in range(len(roidb['segms'])): - segm, iscrowd = segms[i], is_crowd[i] - gt_segm = [] - if iscrowd: - gt_segm.append([[0, 0]]) - else: - for poly in segm: - if len(poly) == 0: - valid = False - break - gt_segm.append(np.array(poly).reshape(-1, 2)) - if (not valid) or len(gt_segm) == 0: - break - gt_masks.append(gt_segm) - outs = outs + (gt_masks, ) - return outs - - -def coco(mode, - batch_size=None, - total_batch_size=None, - padding_total=False, - shuffle=False, - shuffle_seed=None): - total_batch_size = total_batch_size if total_batch_size else batch_size - assert total_batch_size % batch_size == 0 - json_dataset = JsonDataset(mode) - roidbs = json_dataset.get_roidb() - - print("{} on {} with {} roidbs".format(mode, cfg.dataset, len(roidbs))) - - def padding_minibatch(batch_data): - if len(batch_data) == 1: - return batch_data - - max_shape = np.array([data[0].shape for data in batch_data]).max(axis=0) - - padding_batch = [] - for data in batch_data: - im_c, im_h, im_w = data[0].shape[:] - padding_im = np.zeros( - (im_c, max_shape[1], max_shape[2]), dtype=np.float32) - padding_im[:, :im_h, :im_w] = data[0] - padding_batch.append((padding_im, ) + data[1:]) - return padding_batch - - def reader(): - if mode == "train": - if shuffle: - if shuffle_seed is not None: - np.random.seed(shuffle_seed) - roidb_perm = deque(np.random.permutation(roidbs)) - else: - roidb_perm = deque(roidbs) - roidb_cur = 0 - count = 0 - batch_out = [] - device_num = total_batch_size / batch_size - while True: - roidb = roidb_perm[0] - roidb_cur += 1 - roidb_perm.rotate(-1) - if roidb_cur >= len(roidbs): - if shuffle: - roidb_perm = deque(np.random.permutation(roidbs)) - else: - roidb_perm = deque(roidbs) - roidb_cur = 0 - # im, gt_boxes, gt_classes, is_crowd, im_info, im_id, gt_masks - datas = roidb_reader(roidb, mode) - if datas[1].shape[0] == 0: - continue - if cfg.MASK_ON: - if len(datas[-1]) != datas[1].shape[0]: - continue - batch_out.append(datas) - if not padding_total: - if len(batch_out) == batch_size: - yield padding_minibatch(batch_out) - count += 1 - batch_out = [] - else: - if len(batch_out) == total_batch_size: - batch_out = padding_minibatch(batch_out) - for i in range(device_num): - sub_batch_out = [] - for j in range(batch_size): - sub_batch_out.append(batch_out[i * batch_size + - j]) - yield sub_batch_out - count += 1 - sub_batch_out = [] - batch_out = [] - iter_id = count // device_num - if iter_id >= cfg.max_iter * num_trainers: - return - elif mode == "val": - batch_out = [] - for roidb in roidbs: - im, im_info, im_id = roidb_reader(roidb, mode) - batch_out.append((im, im_info, im_id)) - if len(batch_out) == batch_size: - yield batch_out - batch_out = [] - if len(batch_out) != 0: - yield batch_out - - return reader - - -def train(batch_size, - total_batch_size=None, - padding_total=False, - shuffle=True, - shuffle_seed=None): - return coco( - 'train', - batch_size, - total_batch_size, - padding_total, - shuffle=shuffle, - shuffle_seed=shuffle_seed) - - -def test(batch_size, total_batch_size=None, padding_total=False): - return coco('val', batch_size, total_batch_size, shuffle=False) - - -def infer(file_path): - def reader(): - if not os.path.exists(file_path): - raise ValueError("Image path [%s] does not exist." % (file_path)) - im = cv2.imread(file_path) - im = im.astype(np.float32, copy=False) - im -= cfg.pixel_means - im_height, im_width, channel = im.shape - channel_swap = (2, 0, 1) #(channel, height, width) - im = im.transpose(channel_swap) - im_info = np.array([im_height, im_width, 1.0], dtype=np.float32) - yield [(im, im_info)] - - return reader diff --git a/PaddleCV/rcnn/roidbs.py b/PaddleCV/rcnn/roidbs.py deleted file mode 100644 index 581a2907c25e72af85f90317ef290c10057bc44d..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/roidbs.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Based on: -# -------------------------------------------------------- -# Detectron -# Copyright (c) 2017-present, Facebook, Inc. -# Licensed under the Apache License, Version 2.0; -# Written by Ross Girshick -# -------------------------------------------------------- - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import copy -import logging -import numpy as np -import os -import scipy.sparse -import random -import time -import matplotlib -matplotlib.use('Agg') -from pycocotools.coco import COCO -import box_utils -import segm_utils -from config import cfg -from data_utils import DatasetPath - -logger = logging.getLogger(__name__) - - -class JsonDataset(object): - """A class representing a COCO json dataset.""" - - def __init__(self, mode): - print('Creating: {}'.format(cfg.dataset)) - self.name = cfg.dataset - self.is_train = mode == 'train' - data_path = DatasetPath(mode) - data_dir = data_path.get_data_dir() - file_list = data_path.get_file_list() - self.image_directory = data_dir - self.COCO = COCO(file_list) - # Set up dataset classes - category_ids = self.COCO.getCatIds() - categories = [c['name'] for c in self.COCO.loadCats(category_ids)] - self.category_to_id_map = dict(zip(categories, category_ids)) - self.classes = ['__background__'] + categories - self.num_classes = len(self.classes) - self.json_category_id_to_contiguous_id = { - v: i + 1 - for i, v in enumerate(self.COCO.getCatIds()) - } - self.contiguous_category_id_to_json_id = { - v: k - for k, v in self.json_category_id_to_contiguous_id.items() - } - - def get_roidb(self): - """Return an roidb corresponding to the json dataset. Optionally: - - include ground truth boxes in the roidb - - add proposals specified in a proposals file - - filter proposals based on a minimum side length - - filter proposals that intersect with crowd regions - """ - image_ids = self.COCO.getImgIds() - image_ids.sort() - roidb = copy.deepcopy(self.COCO.loadImgs(image_ids)) - for entry in roidb: - self._prep_roidb_entry(entry) - if self.is_train: - # Include ground-truth object annotations - start_time = time.time() - for entry in roidb: - self._add_gt_annotations(entry) - end_time = time.time() - print('_add_gt_annotations took {:.3f}s'.format(end_time - - start_time)) - if cfg.TRAIN.use_flipped: - print('Appending horizontally-flipped training examples...') - self._extend_with_flipped_entries(roidb) - print('Loaded dataset: {:s}'.format(self.name)) - print('{:d} roidb entries'.format(len(roidb))) - if self.is_train: - self._filter_for_training(roidb) - return roidb - - def _prep_roidb_entry(self, entry): - """Adds empty metadata fields to an roidb entry.""" - # Make file_name an abs path - im_path = os.path.join(self.image_directory, entry['file_name']) - #assert os.path.exists(im_path), 'Image \'{}\' not found'.format(im_path) - entry['image'] = im_path - entry['flipped'] = False - # Empty placeholders - entry['gt_boxes'] = np.empty((0, 4), dtype=np.float32) - entry['gt_classes'] = np.empty((0), dtype=np.int32) - entry['gt_id'] = np.empty((0), dtype=np.int32) - entry['is_crowd'] = np.empty((0), dtype=np.bool) - entry['segms'] = [] - # Remove unwanted fields that come from the json file (if they exist) - for k in ['date_captured', 'url', 'license', 'file_name']: - if k in entry: - del entry[k] - - def _add_gt_annotations(self, entry): - """Add ground truth annotation metadata to an roidb entry.""" - count = 0 - #for k in self.category_to_id_map: - # imgs = self.COCO.getImgIds(catIds=(self.category_to_id_map[k])) - # count += len(imgs) - ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) - objs = self.COCO.loadAnns(ann_ids) - # Sanitize bboxes -- some are invalid - valid_objs = [] - valid_segms = [] - width = entry['width'] - height = entry['height'] - for obj in objs: - if isinstance(obj['segmentation'], list): - # Valid polygons have >= 3 points, so require >= 6 coordinates - obj['segmentation'] = [ - p for p in obj['segmentation'] if len(p) >= 6 - ] - if obj['area'] < cfg.TRAIN.gt_min_area: - continue - if 'ignore' in obj and obj['ignore'] == 1: - continue - # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) - x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) - x1, y1, x2, y2 = box_utils.clip_xyxy_to_image(x1, y1, x2, y2, - height, width) - # Require non-zero seg area and more than 1x1 box size - if obj['area'] > 0 and x2 > x1 and y2 > y1: - obj['clean_bbox'] = [x1, y1, x2, y2] - valid_objs.append(obj) - valid_segms.append(obj['segmentation']) - - num_valid_objs = len(valid_objs) - - gt_boxes = np.zeros((num_valid_objs, 4), dtype=entry['gt_boxes'].dtype) - gt_id = np.zeros((num_valid_objs), dtype=np.int64) - gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) - is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) - for ix, obj in enumerate(valid_objs): - cls = self.json_category_id_to_contiguous_id[obj['category_id']] - gt_boxes[ix, :] = obj['clean_bbox'] - gt_classes[ix] = cls - gt_id[ix] = np.int64(obj['id']) - is_crowd[ix] = obj['iscrowd'] - - entry['gt_boxes'] = np.append(entry['gt_boxes'], gt_boxes, axis=0) - entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) - entry['gt_id'] = np.append(entry['gt_id'], gt_id) - entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) - entry['segms'].extend(valid_segms) - - def _extend_with_flipped_entries(self, roidb): - """Flip each entry in the given roidb and return a new roidb that is the - concatenation of the original roidb and the flipped entries. - "Flipping" an entry means that that image and associated metadata (e.g., - ground truth boxes and object proposals) are horizontally flipped. - """ - flipped_roidb = [] - for entry in roidb: - width = entry['width'] - gt_boxes = entry['gt_boxes'].copy() - oldx1 = gt_boxes[:, 0].copy() - oldx2 = gt_boxes[:, 2].copy() - gt_boxes[:, 0] = width - oldx2 - 1 - gt_boxes[:, 2] = width - oldx1 - 1 - assert (gt_boxes[:, 2] >= gt_boxes[:, 0]).all() - flipped_entry = {} - dont_copy = ('gt_boxes', 'flipped', 'segms') - for k, v in entry.items(): - if k not in dont_copy: - flipped_entry[k] = v - flipped_entry['gt_boxes'] = gt_boxes - flipped_entry['segms'] = segm_utils.flip_segms( - entry['segms'], entry['height'], entry['width']) - flipped_entry['flipped'] = True - flipped_roidb.append(flipped_entry) - roidb.extend(flipped_roidb) - - def _filter_for_training(self, roidb): - """Remove roidb entries that have no usable RoIs based on config settings. - """ - - def is_valid(entry): - # Valid images have: - # (1) At least one groundtruth RoI OR - # (2) At least one background RoI - gt_boxes = entry['gt_boxes'] - # image is only valid if such boxes exist - valid = len(gt_boxes) > 0 - return valid - - num = len(roidb) - filtered_roidb = [entry for entry in roidb if is_valid(entry)] - num_after = len(filtered_roidb) - print('Filtered {} roidb entries: {} -> {}'.format(num - num_after, num, - num_after)) diff --git a/PaddleCV/rcnn/scripts/eval.sh b/PaddleCV/rcnn/scripts/eval.sh deleted file mode 100644 index 922380acf52e594931506e791990319d152d9260..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/scripts/eval.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -export CUDA_VISIBLE_DEVICES=0 - -model=$1 # faster_rcnn, mask_rcnn -if [ "$model" = "faster_rcnn" ]; then - mask_on="--MASK_ON False" -elif [ "$model" = "mask_rcnn" ]; then - mask_on="--MASK_ON True" -else - echo "Invalid model provided. Please use one of {faster_rcnn, mask_rcnn}" - exit 1 -fi - -python -u ../eval_coco_map.py \ - $mask_on \ - --pretrained_model=../output/model_iter179999 \ - --data_dir=../dataset/coco/ \ diff --git a/PaddleCV/rcnn/scripts/infer.sh b/PaddleCV/rcnn/scripts/infer.sh deleted file mode 100644 index 6f0e02730b9db07568c31a280825f75e321eab64..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/scripts/infer.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -export CUDA_VISIBLE_DEVICES=0 - -model=$1 # faster_rcnn, mask_rcnn -if [ "$model" = "faster_rcnn" ]; then - mask_on="--MASK_ON False" -elif [ "$model" = "mask_rcnn" ]; then - mask_on="--MASK_ON True" -else - echo "Invalid model provided. Please use one of {faster_rcnn, mask_rcnn}" - exit 1 -fi - -python -u ../infer.py \ - $mask_on \ - --pretrained_model=../output/model_iter179999 \ - --image_path=../dataset/coco/val2017/ \ - --image_name=000000000139.jpg \ - --draw_threshold=0.6 diff --git a/PaddleCV/rcnn/scripts/train.sh b/PaddleCV/rcnn/scripts/train.sh deleted file mode 100755 index 83c67e6c39121c0fecec5cd7c037d14ab53c619d..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/scripts/train.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 - -model=$1 # faster_rcnn, mask_rcnn -if [ "$model" = "faster_rcnn" ]; then - mask_on="--MASK_ON False" -elif [ "$model" = "mask_rcnn" ]; then - mask_on="--MASK_ON True" -else - echo "Invalid model provided. Please use one of {faster_rcnn, mask_rcnn}" - exit 1 -fi - -python -u ../train.py \ - $mask_on \ - --model_save_dir=../output/ \ - --pretrained_model=../imagenet_resnet50_fusebn/ \ - --data_dir=../dataset/coco/ \ - diff --git a/PaddleCV/rcnn/segm_utils.py b/PaddleCV/rcnn/segm_utils.py deleted file mode 100644 index 17b72228bc4284dc5936d4a3fda5c2422c4aa958..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/segm_utils.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://w_idxw.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Based on: -# -------------------------------------------------------- -# Detectron -# Copyright (c) 2017-present, Facebook, Inc. -# Licensed under the Apache License, Version 2.0; -# Written by Ross Girshick -# -------------------------------------------------------- - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np -import pycocotools.mask as mask_util -import cv2 - - -def is_poly(segm): - """Determine if segm is a polygon. Valid segm expected (polygon or RLE).""" - assert isinstance(segm, (list, dict)), \ - 'Invalid segm type: {}'.format(type(segm)) - return isinstance(segm, list) - - -def segms_to_rle(segms, height, width): - rle = segms - if isinstance(segms, list): - # polygon -- a single object might consist of multiple parts - # we merge all parts into one mask rle code - rles = mask_util.frPyObjects(segms, height, width) - rle = mask_util.merge(rles) - elif isinstance(segms['counts'], list): - # uncompressed RLE - rle = mask_util.frPyObjects(segms, height, width) - return rle - - -def segms_to_mask(segms, iscrowd, height, width): - print('segms: ', segms) - if iscrowd: - return [[0 for i in range(width)] for j in range(height)] - rle = segms_to_rle(segms, height, width) - mask = mask_util.decode(rle) - return mask - - -def flip_segms(segms, height, width): - """Left/right flip each mask in a list of masks.""" - - def _flip_poly(poly, width): - flipped_poly = np.array(poly) - flipped_poly[0::2] = width - np.array(poly[0::2]) - 1 - return flipped_poly.tolist() - - def _flip_rle(rle, height, width): - if 'counts' in rle and type(rle['counts']) == list: - # Magic RLE format handling painfully discovered by looking at the - # COCO API showAnns function. - rle = mask_util.frPyObjects([rle], height, width) - mask = mask_util.decode(rle) - mask = mask[:, ::-1, :] - rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8)) - return rle - - flipped_segms = [] - for segm in segms: - if is_poly(segm): - # Polygon format - flipped_segms.append([_flip_poly(poly, width) for poly in segm]) - else: - # RLE format - flipped_segms.append(_flip_rle(segm, height, width)) - return flipped_segms diff --git a/PaddleCV/rcnn/train.py b/PaddleCV/rcnn/train.py deleted file mode 100644 index e858bd95eb1a572df2af1560bbc6f378fbf4e7f7..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/train.py +++ /dev/null @@ -1,273 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os - - -def set_paddle_flags(flags): - for key, value in flags.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -set_paddle_flags({ - 'FLAGS_conv_workspace_size_limit': 500, - 'FLAGS_eager_delete_tensor_gb': 0, # enable gc - 'FLAGS_memory_fraction_of_eager_deletion': 1, - 'FLAGS_fraction_of_gpu_memory_to_use': 0.98 -}) - -import sys -import numpy as np -import time -import shutil -from utility import parse_args, print_arguments, SmoothedValue, TrainingStats, now_time, check_gpu -import collections - -import paddle -import paddle.fluid as fluid -from paddle.fluid import profiler -import reader -import models.model_builder as model_builder -import models.resnet as resnet -from learning_rate import exponential_with_warmup_decay -from config import cfg -import dist_utils - -num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - - -def get_device_num(): - # NOTE(zcd): for multi-processe training, each process use one GPU card. - if num_trainers > 1: - return 1 - return fluid.core.get_cuda_device_count() - - -def train(): - learning_rate = cfg.learning_rate - image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size] - - if cfg.enable_ce: - fluid.default_startup_program().random_seed = 1000 - fluid.default_main_program().random_seed = 1000 - import random - random.seed(0) - np.random.seed(0) - - devices_num = get_device_num() - total_batch_size = devices_num * cfg.TRAIN.im_per_batch - - use_random = True - if cfg.enable_ce: - use_random = False - model = model_builder.RCNN( - add_conv_body_func=resnet.add_ResNet50_conv4_body, - add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head, - use_pyreader=cfg.use_pyreader, - use_random=use_random) - model.build_model(image_shape) - losses, keys = model.loss() - loss = losses[0] - fetch_list = losses - - boundaries = cfg.lr_steps - gamma = cfg.lr_gamma - step_num = len(cfg.lr_steps) - values = [learning_rate * (gamma**i) for i in range(step_num + 1)] - - lr = exponential_with_warmup_decay( - learning_rate=learning_rate, - boundaries=boundaries, - values=values, - warmup_iter=cfg.warm_up_iter, - warmup_factor=cfg.warm_up_factor) - optimizer = fluid.optimizer.Momentum( - learning_rate=lr, - regularization=fluid.regularizer.L2Decay(cfg.weight_decay), - momentum=cfg.momentum) - optimizer.minimize(loss) - fetch_list = fetch_list + [lr] - - for var in fetch_list: - var.persistable = True - - gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) - place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - if cfg.pretrained_model: - - def if_exist(var): - return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) - - fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) - - if cfg.parallel: - build_strategy = fluid.BuildStrategy() - build_strategy.memory_optimize = False - build_strategy.enable_inplace = True - exec_strategy = fluid.ExecutionStrategy() - exec_strategy.num_iteration_per_drop_scope = 10 - - if num_trainers > 1 and cfg.use_gpu: - dist_utils.prepare_for_multi_process(exe, build_strategy, - fluid.default_main_program()) - # NOTE: the process is fast when num_threads is 1 - # for multi-process training. - exec_strategy.num_threads = 1 - - train_exe = fluid.ParallelExecutor( - use_cuda=bool(cfg.use_gpu), - loss_name=loss.name, - build_strategy=build_strategy, - exec_strategy=exec_strategy) - else: - train_exe = exe - - shuffle = True - if cfg.enable_ce: - shuffle = False - # NOTE: do not shuffle dataset when using multi-process training - shuffle_seed = None - if num_trainers > 1: - shuffle_seed = 1 - if cfg.use_pyreader: - train_reader = reader.train( - batch_size=cfg.TRAIN.im_per_batch, - total_batch_size=total_batch_size, - padding_total=cfg.TRAIN.padding_minibatch, - shuffle=shuffle, - shuffle_seed=shuffle_seed) - if num_trainers > 1: - assert shuffle_seed is not None, \ - "If num_trainers > 1, the shuffle_seed must be set, because " \ - "the order of batch data generated by reader " \ - "must be the same in the respective processes." - # NOTE: the order of batch data generated by batch_reader - # must be the same in the respective processes. - if num_trainers > 1: - train_reader = fluid.contrib.reader.distributed_batch_reader( - train_reader) - py_reader = model.py_reader - py_reader.decorate_paddle_reader(train_reader) - else: - if num_trainers > 1: shuffle = False - train_reader = reader.train( - batch_size=total_batch_size, shuffle=shuffle) - feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) - - def save_model(postfix): - model_path = os.path.join(cfg.model_save_dir, postfix) - if os.path.isdir(model_path): - shutil.rmtree(model_path) - fluid.io.save_persistables(exe, model_path) - - def train_loop_pyreader(): - py_reader.start() - train_stats = TrainingStats(cfg.log_window, keys) - try: - start_time = time.time() - prev_start_time = start_time - for iter_id in range(cfg.max_iter): - prev_start_time = start_time - start_time = time.time() - outs = train_exe.run(fetch_list=[v.name for v in fetch_list]) - stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])} - train_stats.update(stats) - logs = train_stats.log() - strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format( - now_time(), iter_id, - np.mean(outs[-1]), logs, start_time - prev_start_time) - print(strs) - sys.stdout.flush() - if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0: - save_model("model_iter{}".format(iter_id)) - - #profiler tools, used for benchmark - if args.is_profiler and iter_id == 10: - profiler.start_profiler("All") - elif args.is_profiler and iter_id == 15: - profiler.stop_profiler("total", args.profiler_path) - return - - end_time = time.time() - total_time = end_time - start_time - last_loss = np.array(outs[0]).mean() - if cfg.enable_ce: - gpu_num = devices_num - epoch_idx = iter_id + 1 - loss = last_loss - print("kpis\teach_pass_duration_card%s\t%s" % - (gpu_num, total_time / epoch_idx)) - print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, loss)) - except (StopIteration, fluid.core.EOFException): - py_reader.reset() - - def train_loop(): - start_time = time.time() - prev_start_time = start_time - start = start_time - train_stats = TrainingStats(cfg.log_window, keys) - for iter_id, data in enumerate(train_reader()): - prev_start_time = start_time - start_time = time.time() - outs = train_exe.run(fetch_list=[v.name for v in fetch_list], - feed=feeder.feed(data)) - stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])} - train_stats.update(stats) - logs = train_stats.log() - strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format( - now_time(), iter_id, - np.mean(outs[-1]), logs, start_time - prev_start_time) - print(strs) - sys.stdout.flush() - if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0: - save_model("model_iter{}".format(iter_id)) - if (iter_id + 1) == cfg.max_iter: - break - #profiler tools, used for benchmark - if args.is_profiler and iter_id == 10: - profiler.start_profiler("All") - elif args.is_profiler and iter_id == 15: - profiler.stop_profiler("total", args.profiler_path) - return - end_time = time.time() - total_time = end_time - start_time - last_loss = np.array(outs[0]).mean() - # only for ce - if cfg.enable_ce: - gpu_num = devices_num - epoch_idx = iter_id + 1 - loss = last_loss - print("kpis\teach_pass_duration_card%s\t%s" % - (gpu_num, total_time / epoch_idx)) - print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, loss)) - - if cfg.use_pyreader: - train_loop_pyreader() - else: - train_loop() - save_model('model_final') - - -if __name__ == '__main__': - args = parse_args() - print_arguments(args) - check_gpu(args.use_gpu) - train() diff --git a/PaddleCV/rcnn/utility.py b/PaddleCV/rcnn/utility.py deleted file mode 100644 index c464d4efcc5fd5162269506a2863c873d7e71dbe..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/utility.py +++ /dev/null @@ -1,199 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -""" -Contains common utility functions. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import sys -import distutils.util -import numpy as np -import six -import collections -from collections import deque -import datetime -from paddle.fluid import core -import argparse -import functools -from config import * -import paddle.fluid as fluid - - -def print_arguments(args): - """Print argparse's arguments. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - parser.add_argument("name", default="Jonh", type=str, help="User name.") - args = parser.parse_args() - print_arguments(args) - - :param args: Input argparse.Namespace for printing. - :type args: argparse.Namespace - """ - print("----------- Configuration Arguments -----------") - for arg, value in sorted(six.iteritems(vars(args))): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") - - -def add_arguments(argname, type, default, help, argparser, **kwargs): - """Add argparse's argument. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - add_argument("name", str, "Jonh", "User name.", parser) - args = parser.parse_args() - """ - type = distutils.util.strtobool if type == bool else type - argparser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) - - -class SmoothedValue(object): - """Track a series of values and provide access to smoothed values over a - window or the global series average. - """ - - def __init__(self, window_size): - self.deque = deque(maxlen=window_size) - - def add_value(self, value): - self.deque.append(value) - - def get_median_value(self): - return np.median(self.deque) - - -def now_time(): - return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f') - - -class TrainingStats(object): - def __init__(self, window_size, stats_keys): - self.smoothed_losses_and_metrics = { - key: SmoothedValue(window_size) - for key in stats_keys - } - - def update(self, stats): - for k, v in self.smoothed_losses_and_metrics.items(): - v.add_value(stats[k]) - - def get(self, extras=None): - stats = collections.OrderedDict() - if extras: - for k, v in extras.items(): - stats[k] = v - for k, v in self.smoothed_losses_and_metrics.items(): - stats[k] = round(v.get_median_value(), 3) - - return stats - - def log(self, extras=None): - d = self.get(extras) - strs = ', '.join(str(dict({x: y})).strip('{}') for x, y in d.items()) - return strs - - -def parse_args(): - """return all args - """ - parser = argparse.ArgumentParser(description=__doc__) - add_arg = functools.partial(add_arguments, argparser=parser) - # yapf: disable - # ENV - add_arg('parallel', bool, True, "Whether use parallel.") - add_arg('use_gpu', bool, True, "Whether use GPU.") - add_arg('model_save_dir', str, 'output', "The path to save model.") - add_arg('pretrained_model', str, 'imagenet_resnet50_fusebn', "The init model path.") - add_arg('dataset', str, 'coco2017', "coco2014, coco2017.") - add_arg('class_num', int, 81, "Class number.") - add_arg('data_dir', str, 'dataset/coco', "The data root path.") - add_arg('use_pyreader', bool, True, "Use pyreader.") - add_arg('use_profile', bool, False, "Whether use profiler.") - add_arg('padding_minibatch',bool, False, - "If False, only resize image and not pad, image shape is different between" - " GPUs in one mini-batch. If True, image shape is the same in one mini-batch.") - #SOLVER - add_arg('learning_rate', float, 0.01, "Learning rate.") - add_arg('max_iter', int, 180000, "Iter number.") - add_arg('log_window', int, 20, "Log smooth window, set 1 for debug, set 20 for train.") - # RCNN - # RPN - add_arg('anchor_sizes', int, [32,64,128,256,512], "The size of anchors.") - add_arg('aspect_ratios', float, [0.5,1.0,2.0], "The ratio of anchors.") - add_arg('variance', float, [1.,1.,1.,1.], "The variance of anchors.") - add_arg('rpn_stride', float, [16.,16.], "Stride of the feature map that RPN is attached.") - add_arg('rpn_nms_thresh', float, 0.7, "NMS threshold used on RPN proposals") - - #NOTE: args for profiler, used for benchmark - add_arg('is_profiler', int, 0, "the profiler switch.(used for benchmark)") - add_arg('profiler_path', str, './', "the profiler output file path.(used for benchmark)") - - # TRAIN VAL INFER - add_arg('MASK_ON', bool, False, "Option for different models. If False, choose faster_rcnn. If True, choose mask_rcnn") - add_arg('im_per_batch', int, 1, "Minibatch size.") - add_arg('max_size', int, 1333, "The resized image height.") - add_arg('scales', int, [800], "The resized image height.") - add_arg('batch_size_per_im',int, 512, "fast rcnn head batch size") - add_arg('pixel_means', float, [102.9801, 115.9465, 122.7717], "pixel mean") - add_arg('nms_thresh', float, 0.5, "NMS threshold.") - add_arg('score_thresh', float, 0.05, "score threshold for NMS.") - add_arg('snapshot_stride', int, 10000, "save model every snapshot stride.") - # SINGLE EVAL AND DRAW - add_arg('draw_threshold', float, 0.8, "Confidence threshold to draw bbox.") - add_arg('image_path', str, 'dataset/coco/val2017', "The image path used to inference and visualize.") - # ce - parser.add_argument( - '--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.') - # yapf: enable - args = parser.parse_args() - file_name = sys.argv[0] - if 'train' in file_name or 'profile' in file_name: - merge_cfg_from_args(args, 'train') - else: - merge_cfg_from_args(args, 'val') - return args - - -def check_gpu(use_gpu): - """ - Log error and exit when set use_gpu=true in paddlepaddle - cpu version. - """ - err = "Config use_gpu cannot be set as true while you are " \ - "using paddlepaddle cpu version ! \nPlease try: \n" \ - "\t1. Install paddlepaddle-gpu to run model on GPU \n" \ - "\t2. Set use_gpu as false in config file to run " \ - "model on CPU" - - try: - if use_gpu and not fluid.is_compiled_with_cuda(): - logger.error(err) - sys.exit(1) - except Exception as e: - pass diff --git a/PaddleCV/ssd/.gitignore b/PaddleCV/ssd/.gitignore deleted file mode 100644 index 404af33d9659de6c2c34a755475be5d0ad5948af..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/.gitignore +++ /dev/null @@ -1,24 +0,0 @@ -# saved model -model/ - -pretrained/ssd_mobilenet_v1_coco.tar.gz -pretrained/ssd_mobilenet_v1_coco -pretrained/mobilenet_v1_imagenet.tar.gz -pretrained/mobilenet_v1_imagenet - -# coco and pascalvoc data -data/coco/train2017/ -data/coco/train2014/ -data/coco/val2017/ -data/coco/val2014/ -data/coco/*.zip -data/coco/annotations/ -data/pascalvoc/VOCdevkit/ -data/pascalvoc/*.tar -data/pascalvoc/test.txt -data/pascalvoc/trainval.txt - -log* -*.log -ssd_mobilenet_v1_pascalvoc* -quant_model diff --git a/PaddleCV/ssd/.run_ce.sh b/PaddleCV/ssd/.run_ce.sh deleted file mode 100755 index 05ae3b5708963645e53c548e72755c489749c594..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/.run_ce.sh +++ /dev/null @@ -1,19 +0,0 @@ -###!/bin/bash -####This file is only used for continuous evaluation. - -export MKL_NUM_THREADS=1 -export OMP_NUM_THREADS=1 - -if [ ! -d "/root/.cache/paddle/dataset/pascalvoc" ];then - mkdir -p /root/.cache/paddle/dataset/pascalvoc - ./data/pascalvoc/download.sh - cp -r ./data/pascalvoc/. /home/.cache/paddle/dataset/pascalvoc -fi - -cudaid=${object_detection_cudaid:=0} -export CUDA_VISIBLE_DEVICES=$cudaid -FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --epoc_num=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py - -cudaid=${object_detection_cudaid_m:=0,1,2,3} -export CUDA_VISIBLE_DEVICES=$cudaid -FLAGS_benchmark=true python train.py --enable_ce=True --batch_size=64 --epoc_num=2 --data_dir=/root/.cache/paddle/dataset/pascalvoc/ | python _ce.py diff --git a/PaddleCV/ssd/README.md b/PaddleCV/ssd/README.md deleted file mode 100644 index f64a135ae9d27f08e1c9ebd7337d2ebc656e049b..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/README.md +++ /dev/null @@ -1,102 +0,0 @@ -**This model has been moved to [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection), which includes more detection models.** - -## SSD Object Detection - -## Table of Contents -- [Introduction](#introduction) -- [Data Preparation](#data-preparation) -- [Train](#train) -- [Evaluate](#evaluate) -- [Infer and Visualize](#infer-and-visualize) -- [Released Model](#released-model) - -### Introduction - -[Single Shot MultiBox Detector (SSD)](https://arxiv.org/abs/1512.02325) framework for object detection can be categorized as a single stage detector. A single stage detector simplifies object detection as a regression problem, which directly predicts the bounding boxes and class probabilities without region proposal. SSD further makes improves by producing these predictions of different scales from different layers, as shown below. Six levels predictions are made in six different scale feature maps. And there are two 3x3 convolutional layers in each feature map, which predict category or a shape offset relative to the prior box(also called anchor), respectively. Thus, we get 38x38x4 + 19x19x6 + 10x10x6 + 5x5x6 + 3x3x4 + 1x1x4 = 8732 detections per class. -

-
-The Single Shot MultiBox Detector (SSD) -

- -SSD is readily pluggable into a wide variant standard convolutional network, such as VGG, ResNet, or MobileNet, which is also called base network or backbone. In this tutorial we used [MobileNet](https://arxiv.org/abs/1704.04861). - -We also recommend users to take a look at the  [IPython Notebook demo](https://aistudio.baidu.com/aistudio/projectDetail/122276) - -### Data Preparation - -Please download [PASCAL VOC dataset](http://host.robots.ox.ac.uk/pascal/VOC/) at first, skip this step if you already have one. - -``` -cd data/pascalvoc -python download.py -``` - -The script `download.py` will also create training and testing file lists. - -### Train - -#### Download the Pre-trained Model. - -We provide two pre-trained models. The one is MobileNet-v1 SSD trained on COCO dataset, but removed the convolutional predictors for COCO dataset. This model can be used to initialize the models when training other datasets, like PASCAL VOC. The other pre-trained model is MobileNet-v1 trained on ImageNet 2012 dataset but removed the last weights and bias in the Fully-Connected layer. Download MobileNet-v1 SSD: - -```bash -sh ./pretrained/download_coco.sh -``` - -**NOTE:** Windows users can download weights from link in `./pretrained/download_coco.sh`. - -Declaration: the MobileNet-v1 SSD model is converted by [TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md). - - -#### Train on PASCAL VOC - -`train.py` is the main caller of the training module. Examples of usage are shown below. - ``` - python -u train.py --batch_size=64 --dataset=pascalvoc --pretrained_model=pretrained/ssd_mobilenet_v1_coco/ - ``` - - Set ```export CUDA_VISIBLE_DEVICES=0,1``` to specifiy the number of GPU you want to use. - - **Note**: set `--use_multiprocess=False` when training on **Windows**, since some problems need to be solved when using Python multiprocess to accelerate data processing. - - For more help on arguments: - - ``` - python train.py --help - ``` - -Data reader is defined in `reader.py`. All images will be resized to 300x300. In training stage, images are randomly distorted, expanded, cropped and flipped: - - distort: distort brightness, contrast, saturation, and hue. - - expand: put the original image into a larger expanded image which is initialized using image mean. - - crop: crop image with respect to different scale, aspect ratio, and overlap. - - flip: flip horizontally. - -We used RMSProp optimizer with mini-batch size 64 to train the MobileNet-SSD. The initial learning rate is 0.001, and was decayed at 40, 60, 80, 100 epochs with multiplier 0.5, 0.25, 0.1, 0.01, respectively. Weight decay is 0.00005. After 120 epochs we achieve 73.32% mAP under 11point metric. - -### Evaluate - -You can evaluate your trained model in different metrics like 11point, integral on both PASCAL VOC and COCO dataset. Note we set the default test list to the dataset's test/val list, you can use your own test list by setting ```--test_list``` args. - -`eval.py` is the main caller of the evaluating module. Examples of usage are shown below. -``` -python eval.py --dataset=pascalvoc --model_dir=model/best_model --data_dir=data/pascalvoc --test_list=test.txt -``` - -### Infer and Visualize -`infer.py` is the main caller of the inferring module. Examples of usage are shown below. -``` -python infer.py --dataset=pascalvoc --nms_threshold=0.45 --model_dir=model/best_model --image_path=./data/pascalvoc/VOCdevkit/VOC2007/JPEGImages/009963.jpg -``` -Below are the examples of running the inference and visualizing the model result. -

- - - -
-MobileNet-v1-SSD 300x300 Visualization Examples -

- - -### Released Model - - -| Model | Pre-trained Model | Training data | Test data | mAP | -|:------------------------:|:------------------:|:----------------:|:------------:|:----:| -|[MobileNet-v1-SSD 300x300](http://paddlemodels.bj.bcebos.com/ssd_mobilenet_v1_pascalvoc.tar.gz) | COCO MobileNet SSD | VOC07+12 trainval| VOC07 test | 73.32% | diff --git a/PaddleCV/ssd/README_cn.md b/PaddleCV/ssd/README_cn.md deleted file mode 100644 index 982b567634907e5af09fd0403daf601ee10abf65..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/README_cn.md +++ /dev/null @@ -1,106 +0,0 @@ -**该项目已被迁移至[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection), 这个项目包含了更多的检测模型。** - -## SSD 目标检测 - -## Table of Contents -- [简介](#简介) -- [数据准备](#数据准备) -- [模型训练](#模型训练) -- [模型评估](#模型评估) -- [模型预测以及可视化](#模型预测以及可视化) -- [模型发布](#模型发布) - -### 简介 - -[Single Shot MultiBox Detector (SSD)](https://arxiv.org/abs/1512.02325) 是一种单阶段的目标检测器。与两阶段的检测方法不同,单阶段目标检测并不进行区域推荐,而是直接从特征图回归出目标的边界框和分类概率。SSD 运用了这种单阶段检测的思想,并且对其进行改进:在不同尺度的特征图上检测对应尺度的目标。如下图所示,SSD 在六个尺度的特征图上进行了不同层级的预测。每个层级由两个3x3卷积分别对目标类别和边界框偏移进行回归。因此对于每个类别,SSD 的六个层级一共会产生 38x38x4 + 19x19x6 + 10x10x6 + 5x5x6 + 3x3x4 + 1x1x4 = 8732 个检测结果。 -

-
-SSD 目标检测模型 -

- -SSD 可以方便地插入到任何一种标准卷积网络中,比如 VGG、ResNet 或者 MobileNet,这些网络被称作检测器的基网络。在这个示例中我们使用 [MobileNet](https://arxiv.org/abs/1704.04861)。 - -同时推荐用户参考[ IPython Notebook demo](https://aistudio.baidu.com/aistudio/projectDetail/122276) - -### 数据准备 - - -请先使用下面的命令下载 [PASCAL VOC 数据集](http://host.robots.ox.ac.uk/pascal/VOC/): - -``` -cd data/pascalvoc -python download.py -``` - -`download.py` 脚本会自动创建训练和测试用的列表文件。 - - -### 模型训练 - -#### 下载预训练模型 - -我们提供了两个预训练模型。第一个模型是在 COCO 数据集上预训练的 MobileNet-v1 SSD,我们将它的预测头移除了以便在 COCO 以外的数据集上进行训练。第二个模型是在 ImageNet 2012 数据集上预训练的 MobileNet-v1,我们也将最后的全连接层移除以便进行目标检测训练。下载 MobileNet-v1 SSD: - -```bash -sh ./pretrained/download_coco.sh -``` - -**注意:** Windows用户可通过`./pretrained/download_coco.sh`中的链接直接下载和解压。 - -声明:MobileNet-v1 SSD 模型转换自[TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md)。MobileNet-v1 模型转换自[Caffe](https://github.com/shicai/MobileNet-Caffe)。 - - -#### 训练 - -`train.py` 是训练模块的主要执行程序,调用示例如下: - ``` - python -u train.py --batch_size=64 --dataset=pascalvoc --pretrained_model=pretrained/ssd_mobilenet_v1_coco/ - ``` - - 可以通过设置 ```export CUDA_VISIBLE_DEVICES=0,1``` 指定想要使用的GPU数量。 - - **注意**: 在**Windows**机器上训练,需要设置 `--use_multiprocess=False`,因为在Windows上使用Python多进程加速训练时有错误。 - - 更多的可选参数见: - - ``` - python train.py --help - ``` - -数据的读取行为定义在 `reader.py` 中,所有的图片都会被缩放到300x300。在训练时还会对图片进行数据增强,包括随机扰动、扩张、翻转和裁剪: - - 扰动: 扰动图片亮度、对比度、饱和度和色相。 - - 扩张: 将原始图片放进一张使用像素均值填充(随后会在减均值操作中减掉)的扩张图中,再对此图进行裁剪、缩放和翻转。 - - 翻转: 水平翻转。 - - 裁剪: 根据缩放比例、长宽比例两个参数生成若干候选框,再依据这些候选框和标注框的面积交并比(IoU)挑选出符合要求的裁剪结果。 - -我们使用了 RMSProp 优化算法来训练 MobileNet-SSD,batch大小为64,权重衰减系数为0.00005,初始学习率为 0.001,并且在第40、60、80、100 轮时使用 0.5, 0.25, 0.1, 0.01乘子进行学习率衰减。在120轮训练后,11point评价标准下的mAP为73.32%。 - -### 模型评估 - -你可以使用11point、integral等指标在PASCAL VOC 数据集上评估训练好的模型。不失一般性,我们采用相应数据集的测试列表作为样例代码的默认列表,你也可以通过设置```--test_list```来指定自己的测试样本列表。 - -`eval.py`是评估模块的主要执行程序,调用示例如下: - -``` -python eval.py --dataset=pascalvoc --model_dir=model/best_model --data_dir=data/pascalvoc --test_list=test.txt -``` - -### 模型预测以及可视化 - -`infer.py`是预测及可视化模块的主要执行程序,调用示例如下: -``` -python infer.py --dataset=pascalvoc --nms_threshold=0.45 --model_dir=model/best_model --image_path=./data/pascalvoc/VOCdevkit/VOC2007/JPEGImages/009963.jpg -``` -下图可视化了模型的预测结果: -

- - - -
-MobileNet-v1-SSD 300x300 预测可视化 -

- - -### 模型发布 - - -| 模型 | 预训练模型 | 训练数据 | 测试数据 | mAP | -|:------------------------:|:------------------:|:----------------:|:------------:|:----:| -|[MobileNet-v1-SSD 300x300](http://paddlemodels.bj.bcebos.com/ssd_mobilenet_v1_pascalvoc.tar.gz) | COCO MobileNet SSD | VOC07+12 trainval| VOC07 test | 73.32% | diff --git a/PaddleCV/ssd/README_quant.md b/PaddleCV/ssd/README_quant.md deleted file mode 100644 index 7ea7f7bd79d21ba34c84d1a1b48a5298837939ac..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/README_quant.md +++ /dev/null @@ -1,146 +0,0 @@ -## Quantization-aware training for SSD - -### Introduction - -The quantization-aware training used in this experiments is introduced in [fixed-point quantization desigin](https://github.com/PaddlePaddle/FluidDoc/blob/develop/doc/fluid/design/quantization/fixed_point_quantization.md). Since quantization-aware training is still an active area of research and experimentation, -here, we just give an simple quantization training usage in Fluid based on MobileNet-SSD model, and more other exeperiments are still needed, like how to quantization traning by considering fusing batch normalization and convolution/fully-connected layers, channel-wise quantization of weights and so on. - - -A Python transpiler is used to rewrite Fluid training program or evaluation program for quantization-aware training: - -```python - - #startup_prog = fluid.Program() - #train_prog = fluid.Program() - #loss = build_program( - # main_prog=train_prog, - # startup_prog=startup_prog, - # is_train=True) - #build_program( - # main_prog=test_prog, - # startup_prog=startup_prog, - # is_train=False) - #test_prog = test_prog.clone(for_test=True) - # above is an pseudo code - - transpiler = fluid.contrib.QuantizeTranspiler( - weight_bits=8, - activation_bits=8, - activation_quantize_type='abs_max', # or 'range_abs_max' - weight_quantize_type='abs_max') - # note, transpiler.training_transpile will rewrite train_prog - # startup_prog is needed since it needs to insert and initialize - # some state variable - transpiler.training_transpile(train_prog, startup_prog) - transpiler.training_transpile(test_prog, startup_prog) -``` - - According to above design, this transpiler inserts fake quantization and de-quantization operation for each convolution operation (including depthwise convolution operation) and fully-connected operation. These quantizations take affect on weights and activations. - - In the design, we introduce dynamic quantization and static quantization strategies for different activation quantization methods. In the expriments, when set `activation_quantize_type` to `abs_max`, it is dynamic quantization. That is to say, the quantization scale (maximum of absolute value) of activation will be calculated each mini-batch during inference. When set `activation_quantize_type` to `range_abs_max`, a quantization scale for inference period will be calculated during training. Following part will introduce how to train. - -### Quantization-aware training - - The training is fine-tuned on the well-trained MobileNet-SSD model. So download model at first: - - ``` - wget http://paddlemodels.bj.bcebos.com/ssd_mobilenet_v1_pascalvoc.tar.gz - ``` - -- dynamic quantization: - - ```python - python main_quant.py \ - --data_dir=$PascalVOC_DIR$ \ - --mode='train' \ - --init_model=ssd_mobilenet_v1_pascalvoc \ - --act_quant_type='abs_max' \ - --epoc_num=20 \ - --learning_rate=0.0001 \ - --batch_size=64 \ - --model_save_dir=$OUTPUT_DIR$ - ``` - Since fine-tuned on a well-trained model, we use a small start learnng rate 0.0001, and train 20 epocs. - -- static quantization: - ```python - python main_quant.py \ - --data_dir=$PascalVOC_DIR$ \ - --mode='train' \ - --init_model=ssd_mobilenet_v1_pascalvoc \ - --act_quant_type='range_abs_max' \ - --epoc_num=80 \ - --learning_rate=0.001 \ - --lr_epochs=30,60 \ - --lr_decay_rates=1,0.1,0.01 \ - --batch_size=64 \ - --model_save_dir=$OUTPUT_DIR$ - ``` - Here, train 80 epocs, learning rate decays at 30 and 60 epocs by 0.1 every time. Users can adjust these hype-parameters. - -### Convert to inference model - - As described in the design documentation, the inference graph is a little different from training, the difference is the de-quantization operation is before or after conv/fc. This is equivalent in training due to linear operation of conv/fc and de-quantization and functions' commutative law. But for inference, it needs to convert the graph, `fluid.contrib.QuantizeTranspiler.freeze_program` is used to do this: - - ```python - #startup_prog = fluid.Program() - #test_prog = fluid.Program() - #test_py_reader, map_eval, nmsed_out, image = build_program( - # main_prog=test_prog, - # startup_prog=startup_prog, - # train_params=configs, - # is_train=False) - #test_prog = test_prog.clone(for_test=True) - #transpiler = fluid.contrib.QuantizeTranspiler(weight_bits=8, - # activation_bits=8, - # activation_quantize_type=act_quant_type, - # weight_quantize_type='abs_max') - #transpiler.training_transpile(test_prog, startup_prog) - #place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - #exe = fluid.Executor(place) - #exe.run(startup_prog) - - def if_exist(var): - return os.path.exists(os.path.join(init_model, var.name)) - fluid.io.load_vars(exe, init_model, main_program=test_prog, - predicate=if_exist) - # freeze the rewrited training program - # freeze after load parameters, it will quantized weights - transpiler.freeze_program(test_prog, place) - ``` - - Users can evaluate the converted model by: - - ``` - python main_quant.py \ - --data_dir=$PascalVOC_DIR$ \ - --mode='test' \ - --init_model=$MODLE_DIR$ \ - --model_save_dir=$MobileNet_SSD_8BIT_MODEL$ - ``` - - You also can check the 8-bit model by the inference scripts - - ``` - python main_quant.py \ - --mode='infer' \ - --init_model=$MobileNet_SSD_8BIT_MODEL$ \ - --confs_threshold=0.5 \ - --image_path='/data/PascalVOC/VOCdevkit/VOC2007/JPEGImages/002271.jpg' - ``` - See 002271.jpg for the visualized image with bbouding boxes. - - - **Note**, if you want to convert model to 8-bit, you should call `fluid.contrib.QuantizeTranspiler.convert_to_int8` to do this. But, now Paddle can't load 8-bit model to do inference. - -### Results - -Results of MobileNet-v1-SSD 300x300 model on PascalVOC dataset. - -| Model | mAP | -|:---------------------------------------:|:------------------:| -|Floating point: 32bit | 73.32% | -|Fixed point: 8bit, dynamic quantization | 72.77% | -|Fixed point: 8bit, static quantization | 72.45% | - - As mentioned above, other experiments, like how to quantization traning by considering fusing batch normalization and convolution/fully-connected layers, channel-wise quantization of weights, quantizated weights type with uint8 instead of int8 and so on. diff --git a/PaddleCV/ssd/_ce.py b/PaddleCV/ssd/_ce.py deleted file mode 100644 index 6f300e162b1c1940a2c8f1463953f0bcbeaa0a78..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/_ce.py +++ /dev/null @@ -1,72 +0,0 @@ -####this file is only used for continuous evaluation test! - -import os -import sys -sys.path.append(os.environ['ceroot']) -from kpi import CostKpi, DurationKpi, AccKpi - -#### NOTE kpi.py should shared in models in some way!!!! - -train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True) -test_acc_kpi = AccKpi('test_acc', 0.01, 0, actived=False) -train_speed_kpi = DurationKpi('train_speed', 0.1, 0, actived=True, unit_repr="s/epoch") -train_cost_card4_kpi = CostKpi('train_cost_card4', 0.02, 0, actived=True) -test_acc_card4_kpi = AccKpi('test_acc_card4', 0.01, 0, actived=False) -train_speed_card4_kpi = DurationKpi('train_speed_card4', 0.1, 0, actived=True, unit_repr="s/epoch") - -tracking_kpis = [ - train_cost_kpi, - test_acc_kpi, - train_speed_kpi, - train_cost_card4_kpi, - test_acc_card4_kpi, - train_speed_card4_kpi, -] - - -def parse_log(log): - ''' - This method should be implemented by model developers. - - The suggestion: - - each line in the log should be key, value, for example: - - " - train_cost\t1.0 - test_cost\t1.0 - train_cost\t1.0 - train_cost\t1.0 - train_acc\t1.2 - " - ''' - #kpi_map = {} - for line in log.split('\n'): - fs = line.strip().split('\t') - print(fs) - if len(fs) == 3 and fs[0] == 'kpis': - print("-----%s" % fs) - kpi_name = fs[1] - kpi_value = float(fs[2]) - #kpi_map[kpi_name] = kpi_value - yield kpi_name, kpi_value - #return kpi_map - - -def log_to_ce(log): - kpi_tracker = {} - for kpi in tracking_kpis: - kpi_tracker[kpi.name] = kpi - - for (kpi_name, kpi_value) in parse_log(log): - print(kpi_name, kpi_value) - kpi_tracker[kpi_name].add_record(kpi_value) - kpi_tracker[kpi_name].persist() - - -if __name__ == '__main__': - log = sys.stdin.read() - print("*****") - print(log) - print("****") - log_to_ce(log) diff --git a/PaddleCV/ssd/data/coco/download.py b/PaddleCV/ssd/data/coco/download.py deleted file mode 100644 index 9df49bef6eab9d615e61e3cd429dcfdbeb5708ce..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/data/coco/download.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import os.path as osp -import sys -import zipfile -import logging - -from paddle.dataset.common import download - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -DATASETS = { - 'coco': [ - # coco2017 - ('http://images.cocodataset.org/zips/train2017.zip', - 'cced6f7f71b7629ddf16f17bbcfab6b2', ), - ('http://images.cocodataset.org/zips/val2017.zip', - '442b8da7639aecaf257c1dceb8ba8c80', ), - ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', - 'f4bbac642086de4f52a3fdda2de5fa2c', ), - # coco2014 - ('http://images.cocodataset.org/zips/train2014.zip', - '0da8c0bd3d6becc4dcb32757491aca88', ), - ('http://images.cocodataset.org/zips/val2014.zip', - 'a3d79f5ed8d289b7a7554ce06a5782b3', ), - ('http://images.cocodataset.org/annotations/annotations_trainval2014.zip', - '0a379cfc70b0e71301e0f377548639bd', ), - ], -} - - -def download_decompress_file(data_dir, url, md5): - logger.info("Downloading from {}".format(url)) - zip_file = download(url, data_dir, md5) - logger.info("Decompressing {}".format(zip_file)) - with zipfile.ZipFile(zip_file) as zf: - zf.extractall(path=data_dir) - os.remove(zip_file) - - -if __name__ == "__main__": - data_dir = osp.split(osp.realpath(sys.argv[0]))[0] - for name, infos in DATASETS.items(): - for info in infos: - download_decompress_file(data_dir, info[0], info[1]) - logger.info("Download dataset {} finished.".format(name)) diff --git a/PaddleCV/ssd/data/pascalvoc/download.py b/PaddleCV/ssd/data/pascalvoc/download.py deleted file mode 100644 index 7e652c2936eb8705eb2ebba57bddddfa6ae53c58..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/data/pascalvoc/download.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import os.path as osp -import sys -import io -import re -import random -import tarfile -import logging - -from paddle.dataset.common import download - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -DATASETS = { - 'pascalvoc': [ - ('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar', - '6cd6e144f989b92b3379bac3b3de84fd', ), - ('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar', - 'c52e279531787c972589f7e41ab4ae64', ), - ('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar', - 'b6e924de25625d8de591ea690078ad9f', ), - ], -} - -devkit_dir = './VOCdevkit' -years = ['2007', '2012'] - - -def get_dir(devkit_dir, year, type): - return osp.join(devkit_dir, 'VOC' + year, type) - - -def walk_dir(devkit_dir, year): - filelist_dir = get_dir(devkit_dir, year, 'ImageSets/Main') - annotation_dir = get_dir(devkit_dir, year, 'Annotations') - img_dir = get_dir(devkit_dir, year, 'JPEGImages') - trainval_list = [] - test_list = [] - added = set() - - for _, _, files in os.walk(filelist_dir): - for fname in files: - img_ann_list = [] - if re.match('[a-z]+_trainval\.txt', fname): - img_ann_list = trainval_list - elif re.match('[a-z]+_test\.txt', fname): - img_ann_list = test_list - else: - continue - fpath = osp.join(filelist_dir, fname) - for line in io.open(fpath): - name_prefix = line.strip().split()[0] - if name_prefix in added: - continue - added.add(name_prefix) - ann_path = osp.join(annotation_dir, name_prefix + '.xml') - img_path = osp.join(img_dir, name_prefix + '.jpg') - assert os.path.isfile(ann_path), 'file %s not found.' % ann_path - assert os.path.isfile(img_path), 'file %s not found.' % img_path - img_ann_list.append((img_path, ann_path)) - - return trainval_list, test_list - - -def prepare_filelist(devkit_dir, years, output_dir): - trainval_list = [] - test_list = [] - for year in years: - trainval, test = walk_dir(devkit_dir, year) - trainval_list.extend(trainval) - test_list.extend(test) - random.shuffle(trainval_list) - with io.open(osp.join(output_dir, 'trainval.txt'), 'w') as ftrainval: - for item in trainval_list: - ftrainval.write(item[0] + ' ' + item[1] + '\n') - - with io.open(osp.join(output_dir, 'test.txt'), 'w') as ftest: - for item in test_list: - ftest.write(item[0] + ' ' + item[1] + '\n') - - - -def download_decompress_file(data_dir, url, md5): - logger.info("Downloading from {}".format(url)) - tar_file = download(url, data_dir, md5) - logger.info("Decompressing {}".format(tar_file)) - with tarfile.open(tar_file) as tf: - tf.extractall(path=data_dir) - os.remove(tar_file) - - -if __name__ == "__main__": - data_dir = osp.split(osp.realpath(sys.argv[0]))[0] - for name, infos in DATASETS.items(): - for info in infos: - download_decompress_file(data_dir, info[0], info[1]) - if name == 'pascalvoc': - logger.info("create list for pascalvoc dataset.") - prepare_filelist(devkit_dir, years, data_dir) - logger.info("Download dataset {} finished.".format(name)) diff --git a/PaddleCV/ssd/data/pascalvoc/label_list b/PaddleCV/ssd/data/pascalvoc/label_list deleted file mode 100644 index 87df23ce0aebcd5ab96fc91c868598c3333da59c..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/data/pascalvoc/label_list +++ /dev/null @@ -1,21 +0,0 @@ -background -aeroplane -bicycle -bird -boat -bottle -bus -car -cat -chair -cow -diningtable -dog -horse -motorbike -person -pottedplant -sheep -sofa -train -tvmonitor diff --git a/PaddleCV/ssd/eval.py b/PaddleCV/ssd/eval.py deleted file mode 100644 index cdcf04519919f9e6ea46a9604dc4093ac512c28a..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/eval.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import time -import numpy as np -import argparse -import functools -import math - -import paddle -import paddle.fluid as fluid -import reader -from mobilenet_ssd import build_mobilenet_ssd -from utility import add_arguments, print_arguments, check_cuda - -parser = argparse.ArgumentParser(description=__doc__) -add_arg = functools.partial(add_arguments, argparser=parser) -# yapf: disable -add_arg('dataset', str, 'pascalvoc', "coco2014, coco2017, and pascalvoc.") -add_arg('batch_size', int, 32, "Minibatch size.") -add_arg('use_gpu', bool, True, "Whether use GPU.") -add_arg('data_dir', str, '', "The data root path.") -add_arg('test_list', str, '', "The testing data lists.") -add_arg('model_dir', str, '', "The model path.") -add_arg('nms_threshold', float, 0.45, "NMS threshold.") -add_arg('ap_version', str, '11point', "integral, 11point.") -add_arg('resize_h', int, 300, "The resized image height.") -add_arg('resize_w', int, 300, "The resized image height.") -add_arg('mean_value_B', float, 127.5, "Mean value for B channel which will be subtracted.") #123.68 -add_arg('mean_value_G', float, 127.5, "Mean value for G channel which will be subtracted.") #116.78 -add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will be subtracted.") #103.94 -# yapf: enable - - -def build_program(main_prog, startup_prog, args, data_args): - image_shape = [3, data_args.resize_h, data_args.resize_w] - if 'coco' in data_args.dataset: - num_classes = 91 - elif 'pascalvoc' in data_args.dataset: - num_classes = 21 - - with fluid.program_guard(main_prog, startup_prog): - py_reader = fluid.layers.py_reader( - capacity=64, - shapes=[[-1] + image_shape, [-1, 4], [-1, 1], [-1, 1]], - lod_levels=[0, 1, 1, 1], - dtypes=["float32", "float32", "int32", "int32"], - use_double_buffer=True) - with fluid.unique_name.guard(): - image, gt_box, gt_label, difficult = fluid.layers.read_file( - py_reader) - locs, confs, box, box_var = build_mobilenet_ssd(image, num_classes, - image_shape) - nmsed_out = fluid.layers.detection_output( - locs, confs, box, box_var, nms_threshold=args.nms_threshold) - with fluid.program_guard(main_prog): - map = fluid.metrics.DetectionMAP( - nmsed_out, - gt_label, - gt_box, - difficult, - num_classes, - overlap_threshold=0.5, - evaluate_difficult=False, - ap_version=args.ap_version) - return py_reader, map - - -def eval(args, data_args, test_list, batch_size, model_dir=None): - startup_prog = fluid.Program() - test_prog = fluid.Program() - test_py_reader, map_eval = build_program( - main_prog=test_prog, - startup_prog=startup_prog, - args=args, - data_args=data_args) - test_prog = test_prog.clone(for_test=True) - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_prog) - - def if_exist(var): - return os.path.exists(os.path.join(model_dir, var.name)) - - fluid.io.load_vars( - exe, model_dir, main_program=test_prog, predicate=if_exist) - - test_reader = reader.test(data_args, test_list, batch_size=batch_size) - test_py_reader.decorate_paddle_reader(test_reader) - - _, accum_map = map_eval.get_map_var() - map_eval.reset(exe) - test_py_reader.start() - try: - batch_id = 0 - while True: - test_map, = exe.run(test_prog, fetch_list=[accum_map]) - if batch_id % 10 == 0: - print("Batch {0}, map {1}".format(batch_id, test_map)) - batch_id += 1 - except (fluid.core.EOFException, StopIteration): - test_py_reader.reset() - print("Test model {0}, map {1}".format(model_dir, test_map)) - - -if __name__ == '__main__': - args = parser.parse_args() - print_arguments(args) - - check_cuda(args.use_gpu) - - data_dir = 'data/pascalvoc' - test_list = 'test.txt' - label_file = 'label_list' - - if not os.path.exists(args.model_dir): - raise ValueError("The model path [%s] does not exist." % - (args.model_dir)) - if 'coco' in args.dataset: - data_dir = 'data/coco' - if '2014' in args.dataset: - test_list = 'annotations/instances_val2014.json' - elif '2017' in args.dataset: - test_list = 'annotations/instances_val2017.json' - - data_args = reader.Settings( - dataset=args.dataset, - data_dir=args.data_dir if len(args.data_dir) > 0 else data_dir, - label_file=label_file, - resize_h=args.resize_h, - resize_w=args.resize_w, - mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R], - apply_distort=False, - apply_expand=False, - ap_version=args.ap_version) - eval( - args, - data_args=data_args, - test_list=args.test_list if len(args.test_list) > 0 else test_list, - batch_size=args.batch_size, - model_dir=args.model_dir) diff --git a/PaddleCV/ssd/eval_coco_map.py b/PaddleCV/ssd/eval_coco_map.py deleted file mode 100644 index c008430776e33ccfaea910b39776292783630eb2..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/eval_coco_map.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import io -import six -import time -import numpy as np -import argparse -import functools - -import paddle -import paddle.fluid as fluid -import reader -from mobilenet_ssd import build_mobilenet_ssd -from utility import add_arguments, print_arguments - -# A special mAP metric for COCO dataset, which averages AP in different IoUs. -# To use this eval_cocoMAP.py, [cocoapi](https://github.com/cocodataset/cocoapi) is needed. -import json -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval - -parser = argparse.ArgumentParser(description=__doc__) -add_arg = functools.partial(add_arguments, argparser=parser) -# yapf: disable -add_arg('dataset', str, 'coco2014', "coco2014, coco2017.") -add_arg('batch_size', int, 32, "Minibatch size.") -add_arg('use_gpu', bool, True, "Whether use GPU.") -add_arg('data_dir', str, '', "The data root path.") -add_arg('test_list', str, '', "The testing data lists.") -add_arg('model_dir', str, '', "The model path.") -add_arg('nms_threshold', float, 0.5, "NMS threshold.") -add_arg('ap_version', str, 'cocoMAP', "cocoMAP.") -add_arg('resize_h', int, 300, "The resized image height.") -add_arg('resize_w', int, 300, "The resized image height.") -add_arg('mean_value_B', float, 127.5, "Mean value for B channel which will be subtracted.") #123.68 -add_arg('mean_value_G', float, 127.5, "Mean value for G channel which will be subtracted.") #116.78 -add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will be subtracted.") #103.94 -# yapf: enable - - -def eval(args, data_args, test_list, batch_size, model_dir=None): - image_shape = [3, data_args.resize_h, data_args.resize_w] - num_classes = 91 - - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - gt_box = fluid.layers.data( - name='gt_box', shape=[4], dtype='float32', lod_level=1) - gt_label = fluid.layers.data( - name='gt_label', shape=[1], dtype='int32', lod_level=1) - gt_iscrowd = fluid.layers.data( - name='gt_iscrowd', shape=[1], dtype='int32', lod_level=1) - gt_image_info = fluid.layers.data( - name='gt_image_id', shape=[3], dtype='int32') - - locs, confs, box, box_var = build_mobilenet_ssd(image, - num_classes, image_shape) - nmsed_out = fluid.layers.detection_output( - locs, confs, box, box_var, nms_threshold=args.nms_threshold) - loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, box_var) - loss = fluid.layers.reduce_sum(loss) - - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - # yapf: disable - if model_dir: - def if_exist(var): - return os.path.exists(os.path.join(model_dir, var.name)) - fluid.io.load_vars(exe, model_dir, predicate=if_exist) - # yapf: enable - test_reader = reader.test(data_args, test_list, batch_size) - feeder = fluid.DataFeeder( - place=place, - feed_list=[image, gt_box, gt_label, gt_iscrowd, gt_image_info]) - - def get_dt_res(nmsed_out_v, data): - dts_res = [] - lod = nmsed_out_v[0].lod()[0] - nmsed_out_v = np.array(nmsed_out_v[0]) - real_batch_size = min(batch_size, len(data)) - assert (len(lod) == real_batch_size + 1), \ - "Error Lod Tensor offset dimension. Lod({}) vs. batch_size({})".format(len(lod), batch_size) - k = 0 - for i in range(real_batch_size): - dt_num_this_img = lod[i + 1] - lod[i] - image_id = int(data[i][4][0]) - image_width = int(data[i][4][1]) - image_height = int(data[i][4][2]) - for j in range(dt_num_this_img): - dt = nmsed_out_v[k] - k = k + 1 - category_id, score, xmin, ymin, xmax, ymax = dt.tolist() - xmin = max(min(xmin, 1.0), 0.0) * image_width - ymin = max(min(ymin, 1.0), 0.0) * image_height - xmax = max(min(xmax, 1.0), 0.0) * image_width - ymax = max(min(ymax, 1.0), 0.0) * image_height - w = xmax - xmin - h = ymax - ymin - bbox = [xmin, ymin, w, h] - dt_res = { - 'image_id': image_id, - 'category_id': category_id, - 'bbox': bbox, - 'score': score - } - dts_res.append(dt_res) - return dts_res - - def test(): - dts_res = [] - - for batch_id, data in enumerate(test_reader()): - nmsed_out_v = exe.run(fluid.default_main_program(), - feed=feeder.feed(data), - fetch_list=[nmsed_out], - return_numpy=False) - if batch_id % 20 == 0: - print("Batch {0}".format(batch_id)) - dts_res += get_dt_res(nmsed_out_v, data) - - with io.open("detection_result.json", 'w') as outfile: - encode_func = unicode if six.PY2 else str - outfile.write(encode_func(json.dumps(dts_res))) - print("start evaluate using coco api") - cocoGt = COCO(os.path.join(data_args.data_dir, test_list)) - cocoDt = cocoGt.loadRes("detection_result.json") - cocoEval = COCOeval(cocoGt, cocoDt, "bbox") - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - - test() - - -if __name__ == '__main__': - args = parser.parse_args() - print_arguments(args) - assert args.dataset in ['coco2014', 'coco2017'] - data_dir = './data/coco' - if '2014' in args.dataset: - test_list = 'annotations/instances_val2014.json' - elif '2017' in args.dataset: - test_list = 'annotations/instances_val2017.json' - - data_args = reader.Settings( - dataset=args.dataset, - data_dir=args.data_dir if len(args.data_dir) > 0 else data_dir, - label_file='', - resize_h=args.resize_h, - resize_w=args.resize_w, - mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R], - apply_distort=False, - apply_expand=False, - ap_version=args.ap_version) - eval( - args, - data_args=data_args, - test_list=args.test_list if len(args.test_list) > 0 else test_list, - batch_size=args.batch_size, - model_dir=args.model_dir) diff --git a/PaddleCV/ssd/image_util.py b/PaddleCV/ssd/image_util.py deleted file mode 100644 index ed4bf6f682d358f6afe082300aaae2737456798b..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/image_util.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from PIL import Image, ImageEnhance, ImageDraw -from PIL import ImageFile -import numpy as np -import random -import math - -ImageFile.LOAD_TRUNCATED_IMAGES = True #otherwise IOError raised image file is truncated - - -class sampler(): - def __init__(self, max_sample, max_trial, min_scale, max_scale, - min_aspect_ratio, max_aspect_ratio, min_jaccard_overlap, - max_jaccard_overlap): - self.max_sample = max_sample - self.max_trial = max_trial - self.min_scale = min_scale - self.max_scale = max_scale - self.min_aspect_ratio = min_aspect_ratio - self.max_aspect_ratio = max_aspect_ratio - self.min_jaccard_overlap = min_jaccard_overlap - self.max_jaccard_overlap = max_jaccard_overlap - - -class bbox(): - def __init__(self, xmin, ymin, xmax, ymax): - self.xmin = xmin - self.ymin = ymin - self.xmax = xmax - self.ymax = ymax - - -def bbox_area(src_bbox): - width = src_bbox.xmax - src_bbox.xmin - height = src_bbox.ymax - src_bbox.ymin - return width * height - - -def generate_sample(sampler): - scale = np.random.uniform(sampler.min_scale, sampler.max_scale) - aspect_ratio = np.random.uniform(sampler.min_aspect_ratio, - sampler.max_aspect_ratio) - aspect_ratio = max(aspect_ratio, (scale**2.0)) - aspect_ratio = min(aspect_ratio, 1 / (scale**2.0)) - - bbox_width = scale * (aspect_ratio**0.5) - bbox_height = scale / (aspect_ratio**0.5) - xmin_bound = 1 - bbox_width - ymin_bound = 1 - bbox_height - xmin = np.random.uniform(0, xmin_bound) - ymin = np.random.uniform(0, ymin_bound) - xmax = xmin + bbox_width - ymax = ymin + bbox_height - sampled_bbox = bbox(xmin, ymin, xmax, ymax) - return sampled_bbox - - -def jaccard_overlap(sample_bbox, object_bbox): - if sample_bbox.xmin >= object_bbox.xmax or \ - sample_bbox.xmax <= object_bbox.xmin or \ - sample_bbox.ymin >= object_bbox.ymax or \ - sample_bbox.ymax <= object_bbox.ymin: - return 0 - intersect_xmin = max(sample_bbox.xmin, object_bbox.xmin) - intersect_ymin = max(sample_bbox.ymin, object_bbox.ymin) - intersect_xmax = min(sample_bbox.xmax, object_bbox.xmax) - intersect_ymax = min(sample_bbox.ymax, object_bbox.ymax) - intersect_size = (intersect_xmax - intersect_xmin) * ( - intersect_ymax - intersect_ymin) - sample_bbox_size = bbox_area(sample_bbox) - object_bbox_size = bbox_area(object_bbox) - overlap = intersect_size / ( - sample_bbox_size + object_bbox_size - intersect_size) - return overlap - - -def satisfy_sample_constraint(sampler, sample_bbox, bbox_labels): - if sampler.min_jaccard_overlap == 0 and sampler.max_jaccard_overlap == 0: - return True - for i in range(len(bbox_labels)): - object_bbox = bbox(bbox_labels[i][1], bbox_labels[i][2], - bbox_labels[i][3], bbox_labels[i][4]) - overlap = jaccard_overlap(sample_bbox, object_bbox) - if sampler.min_jaccard_overlap != 0 and \ - overlap < sampler.min_jaccard_overlap: - continue - if sampler.max_jaccard_overlap != 0 and \ - overlap > sampler.max_jaccard_overlap: - continue - return True - return False - - -def generate_batch_samples(batch_sampler, bbox_labels): - sampled_bbox = [] - index = [] - c = 0 - for sampler in batch_sampler: - found = 0 - for i in range(sampler.max_trial): - if found >= sampler.max_sample: - break - sample_bbox = generate_sample(sampler) - if satisfy_sample_constraint(sampler, sample_bbox, bbox_labels): - sampled_bbox.append(sample_bbox) - found = found + 1 - index.append(c) - c = c + 1 - return sampled_bbox - - -def clip_bbox(src_bbox): - src_bbox.xmin = max(min(src_bbox.xmin, 1.0), 0.0) - src_bbox.ymin = max(min(src_bbox.ymin, 1.0), 0.0) - src_bbox.xmax = max(min(src_bbox.xmax, 1.0), 0.0) - src_bbox.ymax = max(min(src_bbox.ymax, 1.0), 0.0) - return src_bbox - - -def meet_emit_constraint(src_bbox, sample_bbox): - center_x = (src_bbox.xmax + src_bbox.xmin) / 2 - center_y = (src_bbox.ymax + src_bbox.ymin) / 2 - if center_x >= sample_bbox.xmin and \ - center_x <= sample_bbox.xmax and \ - center_y >= sample_bbox.ymin and \ - center_y <= sample_bbox.ymax: - return True - return False - - -def transform_labels(bbox_labels, sample_bbox): - proj_bbox = bbox(0, 0, 0, 0) - sample_labels = [] - for i in range(len(bbox_labels)): - sample_label = [] - object_bbox = bbox(bbox_labels[i][1], bbox_labels[i][2], - bbox_labels[i][3], bbox_labels[i][4]) - if not meet_emit_constraint(object_bbox, sample_bbox): - continue - sample_width = sample_bbox.xmax - sample_bbox.xmin - sample_height = sample_bbox.ymax - sample_bbox.ymin - proj_bbox.xmin = (object_bbox.xmin - sample_bbox.xmin) / sample_width - proj_bbox.ymin = (object_bbox.ymin - sample_bbox.ymin) / sample_height - proj_bbox.xmax = (object_bbox.xmax - sample_bbox.xmin) / sample_width - proj_bbox.ymax = (object_bbox.ymax - sample_bbox.ymin) / sample_height - proj_bbox = clip_bbox(proj_bbox) - if bbox_area(proj_bbox) > 0: - sample_label.append(bbox_labels[i][0]) - sample_label.append(float(proj_bbox.xmin)) - sample_label.append(float(proj_bbox.ymin)) - sample_label.append(float(proj_bbox.xmax)) - sample_label.append(float(proj_bbox.ymax)) - #sample_label.append(bbox_labels[i][5]) - sample_label = sample_label + bbox_labels[i][5:] - sample_labels.append(sample_label) - return sample_labels - - -def crop_image(img, bbox_labels, sample_bbox, image_width, image_height): - sample_bbox = clip_bbox(sample_bbox) - xmin = int(sample_bbox.xmin * image_width) - xmax = int(sample_bbox.xmax * image_width) - ymin = int(sample_bbox.ymin * image_height) - ymax = int(sample_bbox.ymax * image_height) - sample_img = img[ymin:ymax, xmin:xmax] - sample_labels = transform_labels(bbox_labels, sample_bbox) - return sample_img, sample_labels - - -def random_brightness(img, settings): - prob = np.random.uniform(0, 1) - if prob < settings._brightness_prob: - delta = np.random.uniform(-settings._brightness_delta, - settings._brightness_delta) + 1 - img = ImageEnhance.Brightness(img).enhance(delta) - return img - - -def random_contrast(img, settings): - prob = np.random.uniform(0, 1) - if prob < settings._contrast_prob: - delta = np.random.uniform(-settings._contrast_delta, - settings._contrast_delta) + 1 - img = ImageEnhance.Contrast(img).enhance(delta) - return img - - -def random_saturation(img, settings): - prob = np.random.uniform(0, 1) - if prob < settings._saturation_prob: - delta = np.random.uniform(-settings._saturation_delta, - settings._saturation_delta) + 1 - img = ImageEnhance.Color(img).enhance(delta) - return img - - -def random_hue(img, settings): - prob = np.random.uniform(0, 1) - if prob < settings._hue_prob: - delta = np.random.uniform(-settings._hue_delta, settings._hue_delta) - img_hsv = np.array(img.convert('HSV')) - img_hsv[:, :, 0] = img_hsv[:, :, 0] + delta - img = Image.fromarray(img_hsv, mode='HSV').convert('RGB') - return img - - -def distort_image(img, settings): - prob = np.random.uniform(0, 1) - # Apply different distort order - if prob > 0.5: - img = random_brightness(img, settings) - img = random_contrast(img, settings) - img = random_saturation(img, settings) - img = random_hue(img, settings) - else: - img = random_brightness(img, settings) - img = random_saturation(img, settings) - img = random_hue(img, settings) - img = random_contrast(img, settings) - return img - - -def expand_image(img, bbox_labels, img_width, img_height, settings): - prob = np.random.uniform(0, 1) - if prob < settings._expand_prob: - if settings._expand_max_ratio - 1 >= 0.01: - expand_ratio = np.random.uniform(1, settings._expand_max_ratio) - height = int(img_height * expand_ratio) - width = int(img_width * expand_ratio) - h_off = math.floor(np.random.uniform(0, height - img_height)) - w_off = math.floor(np.random.uniform(0, width - img_width)) - expand_bbox = bbox(-w_off / img_width, -h_off / img_height, - (width - w_off) / img_width, - (height - h_off) / img_height) - expand_img = np.ones((height, width, 3)) - expand_img = np.uint8(expand_img * np.squeeze(settings._img_mean)) - expand_img = Image.fromarray(expand_img) - expand_img.paste(img, (int(w_off), int(h_off))) - bbox_labels = transform_labels(bbox_labels, expand_bbox) - return expand_img, bbox_labels, width, height - return img, bbox_labels, img_width, img_height diff --git a/PaddleCV/ssd/images/009943.jpg b/PaddleCV/ssd/images/009943.jpg deleted file mode 100644 index d6262f97052aa7d82068e7d01f4d9982fcf0d3a9..0000000000000000000000000000000000000000 Binary files a/PaddleCV/ssd/images/009943.jpg and /dev/null differ diff --git a/PaddleCV/ssd/images/009956.jpg b/PaddleCV/ssd/images/009956.jpg deleted file mode 100644 index 320d3e251782e946395e7fcadbef051bc2e94bee..0000000000000000000000000000000000000000 Binary files a/PaddleCV/ssd/images/009956.jpg and /dev/null differ diff --git a/PaddleCV/ssd/images/009960.jpg b/PaddleCV/ssd/images/009960.jpg deleted file mode 100644 index 2f73d3d6f1956b1fa9ae1aba3b5d516a53f26b8f..0000000000000000000000000000000000000000 Binary files a/PaddleCV/ssd/images/009960.jpg and /dev/null differ diff --git a/PaddleCV/ssd/images/009962.jpg b/PaddleCV/ssd/images/009962.jpg deleted file mode 100644 index 182d6677bb80d94c5e7e4db3bf6654d3c064566c..0000000000000000000000000000000000000000 Binary files a/PaddleCV/ssd/images/009962.jpg and /dev/null differ diff --git a/PaddleCV/ssd/images/COCO_val2014_000000000139.jpg b/PaddleCV/ssd/images/COCO_val2014_000000000139.jpg deleted file mode 100644 index 203a5273c134bb78bcb860c832bde2e2f49a42f2..0000000000000000000000000000000000000000 Binary files a/PaddleCV/ssd/images/COCO_val2014_000000000139.jpg and /dev/null differ diff --git a/PaddleCV/ssd/images/COCO_val2014_000000000785.jpg b/PaddleCV/ssd/images/COCO_val2014_000000000785.jpg deleted file mode 100644 index 5107ace65f5aedd67ee81317f24c9186441bfdb5..0000000000000000000000000000000000000000 Binary files a/PaddleCV/ssd/images/COCO_val2014_000000000785.jpg and /dev/null differ diff --git a/PaddleCV/ssd/images/COCO_val2014_000000000885.jpg b/PaddleCV/ssd/images/COCO_val2014_000000000885.jpg deleted file mode 100644 index 3aa5a741a31445454dfb3c3a8d2fa24948aca2c4..0000000000000000000000000000000000000000 Binary files a/PaddleCV/ssd/images/COCO_val2014_000000000885.jpg and /dev/null differ diff --git a/PaddleCV/ssd/images/COCO_val2014_000000142324.jpg b/PaddleCV/ssd/images/COCO_val2014_000000142324.jpg deleted file mode 100644 index 9f9e5b6246eaace56b13012b96f42ec0e06c0882..0000000000000000000000000000000000000000 Binary files a/PaddleCV/ssd/images/COCO_val2014_000000142324.jpg and /dev/null differ diff --git a/PaddleCV/ssd/images/COCO_val2014_000000144003.jpg b/PaddleCV/ssd/images/COCO_val2014_000000144003.jpg deleted file mode 100644 index 1f17bbc667d92dfbc4c3b4f17fc7b3715e7ffc1b..0000000000000000000000000000000000000000 Binary files a/PaddleCV/ssd/images/COCO_val2014_000000144003.jpg and /dev/null differ diff --git a/PaddleCV/ssd/images/SSD_paper_figure.jpg b/PaddleCV/ssd/images/SSD_paper_figure.jpg deleted file mode 100644 index 4a180341ceffd6a4c3446994203f1a3adc4c6796..0000000000000000000000000000000000000000 Binary files a/PaddleCV/ssd/images/SSD_paper_figure.jpg and /dev/null differ diff --git a/PaddleCV/ssd/infer.py b/PaddleCV/ssd/infer.py deleted file mode 100644 index e5fc2a2fa16770aa03e0257a11c92b84b6f5dd72..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/infer.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import time -import numpy as np -import argparse -import functools -from PIL import Image -from PIL import ImageDraw -from PIL import ImageFont - -import paddle -import paddle.fluid as fluid -import reader -from mobilenet_ssd import build_mobilenet_ssd -from utility import add_arguments, print_arguments, check_cuda - -parser = argparse.ArgumentParser(description=__doc__) -add_arg = functools.partial(add_arguments, argparser=parser) -# yapf: disable -add_arg('dataset', str, 'pascalvoc', "coco and pascalvoc.") -add_arg('use_gpu', bool, True, "Whether use GPU.") -add_arg('image_path', str, '', "The image used to inference and visualize.") -add_arg('model_dir', str, '', "The model path.") -add_arg('nms_threshold', float, 0.45, "NMS threshold.") -add_arg('confs_threshold', float, 0.5, "Confidence threshold to draw bbox.") -add_arg('resize_h', int, 300, "The resized image height.") -add_arg('resize_w', int, 300, "The resized image height.") -add_arg('mean_value_B', float, 127.5, "Mean value for B channel which will be subtracted.") #123.68 -add_arg('mean_value_G', float, 127.5, "Mean value for G channel which will be subtracted.") #116.78 -add_arg('mean_value_R', float, 127.5, "Mean value for R channel which will be subtracted.") #103.94 -# yapf: enable - - -def infer(args, data_args, image_path, model_dir): - image_shape = [3, data_args.resize_h, data_args.resize_w] - if 'coco' in data_args.dataset: - num_classes = 91 - # cocoapi - from pycocotools.coco import COCO - from pycocotools.cocoeval import COCOeval - label_fpath = os.path.join(data_dir, label_file) - coco = COCO(label_fpath) - category_ids = coco.getCatIds() - label_list = { - item['id']: item['name'] - for item in coco.loadCats(category_ids) - } - label_list[0] = ['background'] - elif 'pascalvoc' in data_args.dataset: - num_classes = 21 - label_list = data_args.label_list - - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - locs, confs, box, box_var = build_mobilenet_ssd(image, num_classes, - image_shape) - nmsed_out = fluid.layers.detection_output( - locs, confs, box, box_var, nms_threshold=args.nms_threshold) - - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - # yapf: disable - if model_dir: - def if_exist(var): - return os.path.exists(os.path.join(model_dir, var.name)) - fluid.io.load_vars(exe, model_dir, predicate=if_exist) - # yapf: enable - infer_reader = reader.infer(data_args, image_path) - feeder = fluid.DataFeeder(place=place, feed_list=[image]) - - data = infer_reader() - - # switch network to test mode (i.e. batch norm test mode) - test_program = fluid.default_main_program().clone(for_test=True) - nmsed_out_v, = exe.run(test_program, - feed=feeder.feed([[data]]), - fetch_list=[nmsed_out], - return_numpy=False) - nmsed_out_v = np.array(nmsed_out_v) - draw_bounding_box_on_image(image_path, nmsed_out_v, args.confs_threshold, - label_list) - - -def draw_bounding_box_on_image(image_path, nms_out, confs_threshold, - label_list): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - im_width, im_height = image.size - - for dt in nms_out: - if dt[1] < confs_threshold: - continue - category_id = dt[0] - bbox = dt[2:] - xmin, ymin, xmax, ymax = clip_bbox(dt[2:]) - (left, right, top, bottom) = (xmin * im_width, xmax * im_width, - ymin * im_height, ymax * im_height) - draw.line( - [(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=4, - fill='red') - if image.mode == 'RGB': - draw.text((left, top), label_list[int(category_id)], (255, 255, 0)) - image_name = image_path.split('/')[-1] - print("image with bbox drawed saved as {}".format(image_name)) - image.save(image_name) - - -def clip_bbox(bbox): - xmin = max(min(bbox[0], 1.), 0.) - ymin = max(min(bbox[1], 1.), 0.) - xmax = max(min(bbox[2], 1.), 0.) - ymax = max(min(bbox[3], 1.), 0.) - return xmin, ymin, xmax, ymax - - -if __name__ == '__main__': - args = parser.parse_args() - print_arguments(args) - - check_cuda(args.use_gpu) - - data_dir = 'data/pascalvoc' - label_file = 'label_list' - - if not os.path.exists(args.model_dir): - raise ValueError("The model path [%s] does not exist." % - (args.model_dir)) - if 'coco' in args.dataset: - data_dir = 'data/coco' - label_file = 'annotations/instances_val2014.json' - - data_args = reader.Settings( - dataset=args.dataset, - data_dir=data_dir, - label_file=label_file, - resize_h=args.resize_h, - resize_w=args.resize_w, - mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R], - apply_distort=False, - apply_expand=False, - ap_version='') - infer( - args, - data_args=data_args, - image_path=args.image_path, - model_dir=args.model_dir) diff --git a/PaddleCV/ssd/main_quant.py b/PaddleCV/ssd/main_quant.py deleted file mode 100644 index 59d43ba98080e5479bac51a2e91bad8d2283c053..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/main_quant.py +++ /dev/null @@ -1,298 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import time -import numpy as np -import argparse -import functools -import shutil -import math -import multiprocessing - -import paddle -import paddle.fluid as fluid -import reader -from mobilenet_ssd import mobile_net -from utility import add_arguments, print_arguments -from train import build_program -from train import train_parameters -from infer import draw_bounding_box_on_image - -parser = argparse.ArgumentParser(description=__doc__) -add_arg = functools.partial(add_arguments, argparser=parser) -# yapf: disable -add_arg('learning_rate', float, 0.0001, "Learning rate.") -add_arg('batch_size', int, 64, "Minibatch size.") -add_arg('epoc_num', int, 20, "Epoch number.") -add_arg('use_gpu', bool, True, "Whether use GPU.") -add_arg('parallel', bool, True, "Whether train in parallel on multi-devices.") -add_arg('model_save_dir', str, 'quant_model', "The path to save model.") -add_arg('init_model', str, 'ssd_mobilenet_v1_pascalvoc', "The init model path.") -add_arg('ap_version', str, '11point', "mAP version can be integral or 11point.") -add_arg('image_shape', str, '3,300,300', "Input image shape.") -add_arg('mean_BGR', str, '127.5,127.5,127.5', "Mean value for B,G,R channel which will be subtracted.") -add_arg('lr_epochs', str, '30,60', "The learning decay steps.") -add_arg('lr_decay_rates', str, '1,0.1,0.01', "The learning decay rates for each step.") -add_arg('data_dir', str, 'data/pascalvoc', "Data directory") -add_arg('act_quant_type', str, 'abs_max', "Quantize type of activation, whicn can be abs_max or range_abs_max") -add_arg('image_path', str, '', "The image used to inference and visualize.") -add_arg('confs_threshold', float, 0.5, "Confidence threshold to draw bbox.") -add_arg('mode', str, 'train', "Job mode can be one of ['train', 'test', 'infer'].") -#yapf: enable - -def test(exe, test_prog, map_eval, test_py_reader): - _, accum_map = map_eval.get_map_var() - map_eval.reset(exe) - test_py_reader.start() - try: - batch = 0 - while True: - test_map, = exe.run(test_prog, fetch_list=[accum_map]) - if batch % 10 == 0: - print("Batch {0}, map {1}".format(batch, test_map)) - batch += 1 - except fluid.core.EOFException: - test_py_reader.reset() - finally: - test_py_reader.reset() - print("Test map {0}".format(test_map)) - return test_map - - -def save_model(exe, main_prog, model_save_dir, postfix): - model_path = os.path.join(model_save_dir, postfix) - if os.path.isdir(model_path): - shutil.rmtree(model_path) - fluid.io.save_persistables(exe, model_path, main_program=main_prog) - - -def train(args, - data_args, - train_params, - train_file_list, - val_file_list): - - model_save_dir = args.model_save_dir - init_model = args.init_model - epoc_num = args.epoc_num - use_gpu = args.use_gpu - parallel = args.parallel - is_shuffle = True - act_quant_type = args.act_quant_type - - if use_gpu: - devices_num = fluid.core.get_cuda_device_count() - else: - devices_num = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) - - batch_size = train_params['batch_size'] - batch_size_per_device = batch_size // devices_num - num_workers = 4 - - startup_prog = fluid.Program() - train_prog = fluid.Program() - test_prog = fluid.Program() - - train_py_reader, loss = build_program( - main_prog=train_prog, - startup_prog=startup_prog, - train_params=train_params, - is_train=True) - test_py_reader, map_eval, _, _ = build_program( - main_prog=test_prog, - startup_prog=startup_prog, - train_params=train_params, - is_train=False) - - test_prog = test_prog.clone(for_test=True) - - transpiler = fluid.contrib.QuantizeTranspiler(weight_bits=8, - activation_bits=8, - activation_quantize_type=act_quant_type, - weight_quantize_type='abs_max') - - transpiler.training_transpile(train_prog, startup_prog) - transpiler.training_transpile(test_prog, startup_prog) - - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_prog) - - if init_model: - print('Load init model %s.' % init_model) - def if_exist(var): - return os.path.exists(os.path.join(init_model, var.name)) - fluid.io.load_vars(exe, init_model, main_program=train_prog, - predicate=if_exist) - else: - print('There is no init model.') - - if parallel: - train_exe = fluid.ParallelExecutor(main_program=train_prog, - use_cuda=True if use_gpu else False, loss_name=loss.name) - - train_reader = reader.train(data_args, - train_file_list, - batch_size_per_device, - shuffle=is_shuffle, - num_workers=num_workers) - test_reader = reader.test(data_args, val_file_list, batch_size) - train_py_reader.decorate_paddle_reader(train_reader) - test_py_reader.decorate_paddle_reader(test_reader) - - train_py_reader.start() - best_map = 0. - for epoc in range(epoc_num): - if epoc == 0: - # test quantized model without quantization-aware training. - test_map = test(exe, test_prog, map_eval, test_py_reader) - batch = 0 - train_py_reader.start() - while True: - try: - # train - start_time = time.time() - if parallel: - outs = train_exe.run(fetch_list=[loss.name]) - else: - outs = exe.run(train_prog, fetch_list=[loss]) - end_time = time.time() - avg_loss = np.mean(np.array(outs[0])) - if batch % 10 == 0: - print("Epoc {:d}, batch {:d}, loss {:.6f}, time {:.5f}".format( - epoc , batch, avg_loss, end_time - start_time)) - except (fluid.core.EOFException, StopIteration): - train_reader().close() - train_py_reader.reset() - break - test_map = test(exe, test_prog, map_eval, test_py_reader) - save_model(exe, train_prog, model_save_dir, str(epoc)) - if test_map > best_map: - best_map = test_map - save_model(exe, train_prog, model_save_dir, 'best_map') - print("Best test map {0}".format(best_map)) - - -def eval(args, data_args, configs, val_file_list): - init_model = args.init_model - use_gpu = args.use_gpu - act_quant_type = args.act_quant_type - model_save_dir = args.model_save_dir - - batch_size = configs['batch_size'] - batch_size_per_device = batch_size - - startup_prog = fluid.Program() - test_prog = fluid.Program() - test_py_reader, map_eval, nmsed_out, image = build_program( - main_prog=test_prog, - startup_prog=startup_prog, - train_params=configs, - is_train=False) - test_prog = test_prog.clone(for_test=True) - - transpiler = fluid.contrib.QuantizeTranspiler(weight_bits=8, - activation_bits=8, - activation_quantize_type=act_quant_type, - weight_quantize_type='abs_max') - transpiler.training_transpile(test_prog, startup_prog) - - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_prog) - - def if_exist(var): - return os.path.exists(os.path.join(init_model, var.name)) - fluid.io.load_vars(exe, init_model, main_program=test_prog, - predicate=if_exist) - - # freeze after load parameters - transpiler.freeze_program(test_prog, place) - - test_reader = reader.test(data_args, val_file_list, batch_size) - test_py_reader.decorate_paddle_reader(test_reader) - - test_map = test(exe, test_prog, map_eval, test_py_reader) - print("Test model {0}, map {1}".format(init_model, test_map)) - # convert model to 8-bit before saving, but now Paddle can't load - # the 8-bit model to do inference. - # transpiler.convert_to_int8(test_prog, place) - fluid.io.save_inference_model(model_save_dir, [image.name], - [nmsed_out], exe, test_prog) - - -def infer(args, data_args): - model_dir = args.init_model - image_path = args.image_path - confs_threshold = args.confs_threshold - - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - [inference_program, feed , fetch] = fluid.io.load_inference_model( - dirname=model_dir, - executor=exe, - model_filename='__model__') - - #print(np.array(fluid.global_scope().find_var('conv2d_20.w_0').get_tensor())) - #print(np.max(np.array(fluid.global_scope().find_var('conv2d_20.w_0').get_tensor()))) - infer_reader = reader.infer(data_args, image_path) - data = infer_reader() - data = data.reshape((1,) + data.shape) - outs = exe.run(inference_program, - feed={feed[0]: data}, - fetch_list=fetch, - return_numpy=False) - out = np.array(outs[0]) - draw_bounding_box_on_image(image_path, out, confs_threshold, - data_args.label_list) - - -if __name__ == '__main__': - args = parser.parse_args() - print_arguments(args) - - # for pascalvoc - label_file = 'label_list' - train_list = 'trainval.txt' - val_list = 'test.txt' - dataset = 'pascalvoc' - - mean_BGR = [float(m) for m in args.mean_BGR.split(",")] - image_shape = [int(m) for m in args.image_shape.split(",")] - lr_epochs = [int(m) for m in args.lr_epochs.split(",")] - lr_rates = [float(m) for m in args.lr_decay_rates.split(",")] - train_parameters[dataset]['image_shape'] = image_shape - train_parameters[dataset]['batch_size'] = args.batch_size - train_parameters[dataset]['lr'] = args.learning_rate - train_parameters[dataset]['epoc_num'] = args.epoc_num - train_parameters[dataset]['ap_version'] = args.ap_version - train_parameters[dataset]['lr_epochs'] = lr_epochs - train_parameters[dataset]['lr_decay'] = lr_rates - - data_args = reader.Settings( - dataset=dataset, - data_dir=args.data_dir, - label_file=label_file, - resize_h=image_shape[1], - resize_w=image_shape[2], - mean_value=mean_BGR, - apply_distort=True, - apply_expand=True, - ap_version = args.ap_version) - if args.mode == 'train': - train(args, data_args, train_parameters[dataset], train_list, val_list) - elif args.mode == 'test': - eval(args, data_args, train_parameters[dataset], val_list) - else: - infer(args, data_args) diff --git a/PaddleCV/ssd/mobilenet_ssd.py b/PaddleCV/ssd/mobilenet_ssd.py deleted file mode 100644 index b8d11f544d35094d2db2aa73702044c9bf91ea54..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/mobilenet_ssd.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA -from paddle.fluid.param_attr import ParamAttr - - -class MobileNetSSD: - def __init__(self, img, num_classes, img_shape): - self.img = img - self.num_classes = num_classes - self.img_shape = img_shape - - def ssd_net(self, scale=1.0): - # 300x300 - tmp = self.conv_bn(self.img, 3, int(32 * scale), 2, 1) - # 150x150 - tmp = self.depthwise_separable(tmp, 32, 64, 32, 1, scale) - tmp = self.depthwise_separable(tmp, 64, 128, 64, 2, scale) - # 75x75 - tmp = self.depthwise_separable(tmp, 128, 128, 128, 1, scale) - tmp = self.depthwise_separable(tmp, 128, 256, 128, 2, scale) - # 38x38 - tmp = self.depthwise_separable(tmp, 256, 256, 256, 1, scale) - tmp = self.depthwise_separable(tmp, 256, 512, 256, 2, scale) - - # 19x19 - for i in range(5): - tmp = self.depthwise_separable(tmp, 512, 512, 512, 1, scale) - module11 = tmp - tmp = self.depthwise_separable(tmp, 512, 1024, 512, 2, scale) - - # 10x10 - module13 = self.depthwise_separable(tmp, 1024, 1024, 1024, 1, scale) - module14 = self.extra_block(module13, 256, 512, 1, 2) - # 5x5 - module15 = self.extra_block(module14, 128, 256, 1, 2) - # 3x3 - module16 = self.extra_block(module15, 128, 256, 1, 2) - # 2x2 - module17 = self.extra_block(module16, 64, 128, 1, 2) - - mbox_locs, mbox_confs, box, box_var = fluid.layers.multi_box_head( - inputs=[ - module11, module13, module14, module15, module16, module17 - ], - image=self.img, - num_classes=self.num_classes, - min_ratio=20, - max_ratio=90, - min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0], - max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0], - aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], - [2., 3.]], - base_size=self.img_shape[2], - offset=0.5, - flip=True) - - return mbox_locs, mbox_confs, box, box_var - - def conv_bn(self, - input, - filter_size, - num_filters, - stride, - padding, - num_groups=1, - act='relu', - use_cudnn=True): - parameter_attr = ParamAttr(learning_rate=0.1, initializer=MSRA()) - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=parameter_attr, - bias_attr=False) - return fluid.layers.batch_norm(input=conv, act=act) - - def depthwise_separable(self, input, num_filters1, num_filters2, num_groups, - stride, scale): - depthwise_conv = self.conv_bn( - input=input, - filter_size=3, - num_filters=int(num_filters1 * scale), - stride=stride, - padding=1, - num_groups=int(num_groups * scale), - use_cudnn=False) - - pointwise_conv = self.conv_bn( - input=depthwise_conv, - filter_size=1, - num_filters=int(num_filters2 * scale), - stride=1, - padding=0) - return pointwise_conv - - def extra_block(self, input, num_filters1, num_filters2, num_groups, stride): - # 1x1 conv - pointwise_conv = self.conv_bn( - input=input, - filter_size=1, - num_filters=int(num_filters1), - stride=1, - num_groups=int(num_groups), - padding=0) - - # 3x3 conv - normal_conv = self.conv_bn( - input=pointwise_conv, - filter_size=3, - num_filters=int(num_filters2), - stride=2, - num_groups=int(num_groups), - padding=1) - return normal_conv - - -def build_mobilenet_ssd(img, num_classes, img_shape): - ssd_model = MobileNetSSD(img, num_classes, img_shape) - return ssd_model.ssd_net() diff --git a/PaddleCV/ssd/pretrained/download_coco.sh b/PaddleCV/ssd/pretrained/download_coco.sh deleted file mode 100755 index 1cd1836f7c6e32f9f308a0c9a29d10efbc6f183f..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/pretrained/download_coco.sh +++ /dev/null @@ -1,8 +0,0 @@ -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" - -# Download the data. -echo "Downloading..." -wget http://paddlemodels.bj.bcebos.com/ssd_mobilenet_v1_coco.tar.gz -echo "Extractint..." -tar -xf ssd_mobilenet_v1_coco.tar.gz diff --git a/PaddleCV/ssd/pretrained/download_imagenet.sh b/PaddleCV/ssd/pretrained/download_imagenet.sh deleted file mode 100755 index eb7c6767d9f9585342c2ba89a2f28f070d1351c2..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/pretrained/download_imagenet.sh +++ /dev/null @@ -1,8 +0,0 @@ -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" - -# Download the data. -echo "Downloading..." -wget http://paddlemodels.bj.bcebos.com/mobilenet_v1_imagenet.tar.gz -echo "Extractint..." -tar -xf mobilenet_v1_imagenet.tar.gz diff --git a/PaddleCV/ssd/reader.py b/PaddleCV/ssd/reader.py deleted file mode 100644 index 02c7187c12c2e1ab338e977dd9d4984b023c3770..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/reader.py +++ /dev/null @@ -1,368 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import xml.etree.ElementTree -import os -import time -import copy -import six -import math -import numpy as np -from PIL import Image -from PIL import ImageDraw -import image_util -import paddle - - -class Settings(object): - def __init__(self, - dataset=None, - data_dir=None, - label_file=None, - resize_h=300, - resize_w=300, - mean_value=[127.5, 127.5, 127.5], - apply_distort=True, - apply_expand=True, - ap_version='11point'): - self._dataset = dataset - self._ap_version = ap_version - self._data_dir = data_dir - if 'pascalvoc' in dataset: - self._label_list = [] - label_fpath = os.path.join(data_dir, label_file) - for line in open(label_fpath): - self._label_list.append(line.strip()) - - self._apply_distort = apply_distort - self._apply_expand = apply_expand - self._resize_height = resize_h - self._resize_width = resize_w - self._img_mean = np.array(mean_value)[:, np.newaxis, np.newaxis].astype( - 'float32') - self._expand_prob = 0.5 - self._expand_max_ratio = 4 - self._hue_prob = 0.5 - self._hue_delta = 18 - self._contrast_prob = 0.5 - self._contrast_delta = 0.5 - self._saturation_prob = 0.5 - self._saturation_delta = 0.5 - self._brightness_prob = 0.5 - self._brightness_delta = 0.125 - - @property - def dataset(self): - return self._dataset - - @property - def ap_version(self): - return self._ap_version - - @property - def apply_expand(self): - return self._apply_expand - - @property - def apply_distort(self): - return self._apply_distort - - @property - def data_dir(self): - return self._data_dir - - @data_dir.setter - def data_dir(self, data_dir): - self._data_dir = data_dir - - @property - def label_list(self): - return self._label_list - - @property - def resize_h(self): - return self._resize_height - - @property - def resize_w(self): - return self._resize_width - - @property - def img_mean(self): - return self._img_mean - - -def preprocess(img, bbox_labels, mode, settings): - img_width, img_height = img.size - sampled_labels = bbox_labels - if mode == 'train': - if settings._apply_distort: - img = image_util.distort_image(img, settings) - if settings._apply_expand: - img, bbox_labels, img_width, img_height = image_util.expand_image( - img, bbox_labels, img_width, img_height, settings) - # sampling - batch_sampler = [] - # hard-code here - batch_sampler.append( - image_util.sampler(1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0)) - batch_sampler.append( - image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0)) - batch_sampler.append( - image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0)) - batch_sampler.append( - image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0)) - batch_sampler.append( - image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0)) - batch_sampler.append( - image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0)) - batch_sampler.append( - image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0)) - sampled_bbox = image_util.generate_batch_samples(batch_sampler, - bbox_labels) - - img = np.array(img) - if len(sampled_bbox) > 0: - idx = int(np.random.uniform(0, len(sampled_bbox))) - img, sampled_labels = image_util.crop_image( - img, bbox_labels, sampled_bbox[idx], img_width, img_height) - - img = Image.fromarray(img) - img = img.resize((settings.resize_w, settings.resize_h), Image.ANTIALIAS) - img = np.array(img) - - if mode == 'train': - mirror = int(np.random.uniform(0, 2)) - if mirror == 1: - img = img[:, ::-1, :] - for i in six.moves.xrange(len(sampled_labels)): - tmp = sampled_labels[i][1] - sampled_labels[i][1] = 1 - sampled_labels[i][3] - sampled_labels[i][3] = 1 - tmp - # HWC to CHW - if len(img.shape) == 3: - img = np.swapaxes(img, 1, 2) - img = np.swapaxes(img, 1, 0) - # RBG to BGR - img = img[[2, 1, 0], :, :] - img = img.astype('float32') - img -= settings.img_mean - img = img * 0.007843 - return img, sampled_labels - - -def coco(settings, coco_api, file_list, mode, batch_size, shuffle, data_dir): - from pycocotools.coco import COCO - - def reader(): - if mode == 'train' and shuffle: - np.random.shuffle(file_list) - batch_out = [] - for image in file_list: - image_name = image['file_name'] - image_path = os.path.join(data_dir, image_name) - if not os.path.exists(image_path): - raise ValueError("%s is not exist, you should specify " - "data path correctly." % image_path) - im = Image.open(image_path) - if im.mode == 'L': - im = im.convert('RGB') - im_width, im_height = im.size - im_id = image['id'] - - # layout: category_id | xmin | ymin | xmax | ymax | iscrowd - bbox_labels = [] - annIds = coco_api.getAnnIds(imgIds=image['id']) - anns = coco_api.loadAnns(annIds) - for ann in anns: - bbox_sample = [] - # start from 1, leave 0 to background - bbox_sample.append(float(ann['category_id'])) - bbox = ann['bbox'] - xmin, ymin, w, h = bbox - xmax = xmin + w - ymax = ymin + h - bbox_sample.append(float(xmin) / im_width) - bbox_sample.append(float(ymin) / im_height) - bbox_sample.append(float(xmax) / im_width) - bbox_sample.append(float(ymax) / im_height) - bbox_sample.append(float(ann['iscrowd'])) - bbox_labels.append(bbox_sample) - im, sample_labels = preprocess(im, bbox_labels, mode, settings) - sample_labels = np.array(sample_labels) - if len(sample_labels) == 0: continue - im = im.astype('float32') - boxes = sample_labels[:, 1:5] - lbls = sample_labels[:, 0].astype('int32') - iscrowd = sample_labels[:, -1].astype('int32') - if 'cocoMAP' in settings.ap_version: - batch_out.append((im, boxes, lbls, iscrowd, - [im_id, im_width, im_height])) - else: - batch_out.append((im, boxes, lbls, iscrowd)) - - if len(batch_out) == batch_size: - yield batch_out - batch_out = [] - - if mode == 'test' and len(batch_out) > 1: - yield batch_out - batch_out = [] - - return reader - - -def pascalvoc(settings, file_list, mode, batch_size, shuffle): - def reader(): - if mode == 'train' and shuffle: - np.random.shuffle(file_list) - batch_out = [] - cnt = 0 - for image in file_list: - image_path, label_path = image.split() - image_path = os.path.join(settings.data_dir, image_path) - label_path = os.path.join(settings.data_dir, label_path) - if not os.path.exists(image_path): - raise ValueError("%s is not exist, you should specify " - "data path correctly." % image_path) - im = Image.open(image_path) - if im.mode == 'L': - im = im.convert('RGB') - im_width, im_height = im.size - - # layout: label | xmin | ymin | xmax | ymax | difficult - bbox_labels = [] - root = xml.etree.ElementTree.parse(label_path).getroot() - for object in root.findall('object'): - bbox_sample = [] - # start from 1 - bbox_sample.append( - float(settings.label_list.index(object.find('name').text))) - bbox = object.find('bndbox') - difficult = float(object.find('difficult').text) - bbox_sample.append(float(bbox.find('xmin').text) / im_width) - bbox_sample.append(float(bbox.find('ymin').text) / im_height) - bbox_sample.append(float(bbox.find('xmax').text) / im_width) - bbox_sample.append(float(bbox.find('ymax').text) / im_height) - bbox_sample.append(difficult) - bbox_labels.append(bbox_sample) - im, sample_labels = preprocess(im, bbox_labels, mode, settings) - sample_labels = np.array(sample_labels) - if len(sample_labels) == 0: continue - im = im.astype('float32') - boxes = sample_labels[:, 1:5] - lbls = sample_labels[:, 0].astype('int32') - difficults = sample_labels[:, -1].astype('int32') - - batch_out.append((im, boxes, lbls, difficults)) - if len(batch_out) == batch_size: - yield batch_out - cnt += len(batch_out) - batch_out = [] - - if mode == 'test' and len(batch_out) > 1: - yield batch_out - cnt += len(batch_out) - batch_out = [] - - return reader - - -def train(settings, - file_list, - batch_size, - shuffle=True, - use_multiprocess=True, - num_workers=8, - enable_ce=False): - file_path = os.path.join(settings.data_dir, file_list) - readers = [] - if 'coco' in settings.dataset: - # cocoapi - from pycocotools.coco import COCO - coco_api = COCO(file_path) - image_ids = coco_api.getImgIds() - images = coco_api.loadImgs(image_ids) - np.random.shuffle(images) - if '2014' in file_list: - sub_dir = "train2014" - elif '2017' in file_list: - sub_dir = "train2017" - data_dir = os.path.join(settings.data_dir, sub_dir) - - n = int(math.ceil(len(images) // num_workers)) if use_multiprocess \ - else len(images) - image_lists = [images[i:i + n] for i in range(0, len(images), n)] - for l in image_lists: - readers.append( - coco(settings, coco_api, l, 'train', batch_size, shuffle, - data_dir)) - else: - images = [line.strip() for line in open(file_path)] - np.random.shuffle(images) - n = int(math.ceil(len(images) // num_workers)) if use_multiprocess \ - else len(images) - image_lists = [images[i:i + n] for i in range(0, len(images), n)] - for l in image_lists: - readers.append(pascalvoc(settings, l, 'train', batch_size, shuffle)) - print("use_multiprocess ", use_multiprocess) - if use_multiprocess: - return paddle.reader.multiprocess_reader(readers, False) - else: - return readers[0] - - -def test(settings, file_list, batch_size): - file_list = os.path.join(settings.data_dir, file_list) - if 'coco' in settings.dataset: - from pycocotools.coco import COCO - coco_api = COCO(file_list) - image_ids = coco_api.getImgIds() - images = coco_api.loadImgs(image_ids) - if '2014' in file_list: - sub_dir = "val2014" - elif '2017' in file_list: - sub_dir = "val2017" - data_dir = os.path.join(settings.data_dir, sub_dir) - return coco(settings, coco_api, images, 'test', batch_size, False, - data_dir) - else: - image_list = [line.strip() for line in open(file_list)] - return pascalvoc(settings, image_list, 'test', batch_size, False) - - -def infer(settings, image_path): - def reader(): - if not os.path.exists(image_path): - raise ValueError("%s is not exist, you should specify " - "data path correctly." % image_path) - img = Image.open(image_path) - if img.mode == 'L': - img = img.convert('RGB') - im_width, im_height = img.size - img = img.resize((settings.resize_w, settings.resize_h), - Image.ANTIALIAS) - img = np.array(img) - # HWC to CHW - if len(img.shape) == 3: - img = np.swapaxes(img, 1, 2) - img = np.swapaxes(img, 1, 0) - # RBG to BGR - img = img[[2, 1, 0], :, :] - img = img.astype('float32') - img -= settings.img_mean - img = img * 0.007843 - return img - - return reader diff --git a/PaddleCV/ssd/train.py b/PaddleCV/ssd/train.py deleted file mode 100644 index 7bbb0fdaf6c80dbc5902690fdac916c622903c0e..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/train.py +++ /dev/null @@ -1,357 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -import time -import numpy as np -import argparse -import functools -import shutil -import math -import multiprocessing - - -def set_paddle_flags(**kwargs): - for key, value in kwargs.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -# NOTE(paddle-dev): All of these flags should be -# set before `import paddle`. Otherwise, it would -# not take any effect. -set_paddle_flags( - FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory -) - -import paddle -import paddle.fluid as fluid -import reader -from mobilenet_ssd import build_mobilenet_ssd -from utility import add_arguments, print_arguments, check_cuda - -parser = argparse.ArgumentParser(description=__doc__) -add_arg = functools.partial(add_arguments, argparser=parser) -# yapf: disable -add_arg('learning_rate', float, 0.001, "Learning rate.") -add_arg('batch_size', int, 64, "Minibatch size of all devices.") -add_arg('epoc_num', int, 120, "Epoch number.") -add_arg('use_gpu', bool, True, "Whether use GPU.") -add_arg('parallel', bool, True, "Whether train in parallel on multi-devices.") -add_arg('dataset', str, 'pascalvoc', "dataset can be coco2014, coco2017, and pascalvoc.") -add_arg('model_save_dir', str, 'model', "The path to save model.") -add_arg('pretrained_model', str, 'pretrained/ssd_mobilenet_v1_coco/', "The init model path.") -add_arg('ap_version', str, '11point', "mAP version can be integral or 11point.") -add_arg('image_shape', str, '3,300,300', "Input image shape.") -add_arg('mean_BGR', str, '127.5,127.5,127.5', "Mean value for B,G,R channel which will be subtracted.") -add_arg('data_dir', str, 'data/pascalvoc', "Data directory.") -add_arg('use_multiprocess', bool, True, "Whether use multi-process for data preprocessing.") -add_arg('enable_ce', bool, False, "Whether use CE to evaluate the model.") -#yapf: enable - -train_parameters = { - "pascalvoc": { - "train_images": 16551, - "image_shape": [3, 300, 300], - "class_num": 21, - "batch_size": 64, - "lr": 0.001, - "lr_epochs": [40, 60, 80, 100], - "lr_decay": [1, 0.5, 0.25, 0.1, 0.01], - "ap_version": '11point', - }, - "coco2014": { - "train_images": 82783, - "image_shape": [3, 300, 300], - "class_num": 91, - "batch_size": 64, - "lr": 0.001, - "lr_epochs": [12, 19], - "lr_decay": [1, 0.5, 0.25], - "ap_version": 'integral', # should use eval_coco_map.py to test model - }, - "coco2017": { - "train_images": 118287, - "image_shape": [3, 300, 300], - "class_num": 91, - "batch_size": 64, - "lr": 0.001, - "lr_epochs": [12, 19], - "lr_decay": [1, 0.5, 0.25], - "ap_version": 'integral', # should use eval_coco_map.py to test model - } -} - -def optimizer_setting(train_params): - batch_size = train_params["batch_size"] - iters = train_params["train_images"] // batch_size - lr = train_params["lr"] - boundaries = [i * iters for i in train_params["lr_epochs"]] - values = [ i * lr for i in train_params["lr_decay"]] - - optimizer = fluid.optimizer.RMSProp( - learning_rate=fluid.layers.piecewise_decay(boundaries, values), - regularization=fluid.regularizer.L2Decay(0.00005), ) - - return optimizer - - -def build_program(main_prog, startup_prog, train_params, is_train): - image_shape = train_params['image_shape'] - class_num = train_params['class_num'] - ap_version = train_params['ap_version'] - outs = [] - with fluid.program_guard(main_prog, startup_prog): - py_reader = fluid.layers.py_reader( - capacity=64, - shapes=[[-1] + image_shape, [-1, 4], [-1, 1], [-1, 1]], - lod_levels=[0, 1, 1, 1], - dtypes=["float32", "float32", "int32", "int32"], - use_double_buffer=True) - with fluid.unique_name.guard(): - image, gt_box, gt_label, difficult = fluid.layers.read_file(py_reader) - locs, confs, box, box_var = build_mobilenet_ssd(image, class_num, image_shape) - if is_train: - with fluid.unique_name.guard("train"): - loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, - box_var) - loss = fluid.layers.reduce_sum(loss) - optimizer = optimizer_setting(train_params) - optimizer.minimize(loss) - outs = [py_reader, loss] - else: - with fluid.unique_name.guard("inference"): - nmsed_out = fluid.layers.detection_output( - locs, confs, box, box_var, nms_threshold=0.45) - map_eval = fluid.metrics.DetectionMAP( - nmsed_out, - gt_label, - gt_box, - difficult, - class_num, - overlap_threshold=0.5, - evaluate_difficult=False, - ap_version=ap_version) - # nmsed_out and image is used to save mode for inference - outs = [py_reader, map_eval, nmsed_out, image] - return outs - - -def train(args, - data_args, - train_params, - train_file_list, - val_file_list): - - model_save_dir = args.model_save_dir - pretrained_model = args.pretrained_model - use_gpu = args.use_gpu - parallel = args.parallel - enable_ce = args.enable_ce - is_shuffle = True - - if not use_gpu: - devices_num = int(os.environ.get('CPU_NUM', - multiprocessing.cpu_count())) - else: - devices_num = fluid.core.get_cuda_device_count() - - batch_size = train_params['batch_size'] - epoc_num = train_params['epoc_num'] - batch_size_per_device = batch_size // devices_num - num_workers = 8 - - startup_prog = fluid.Program() - train_prog = fluid.Program() - test_prog = fluid.Program() - - if enable_ce: - import random - random.seed(0) - np.random.seed(0) - is_shuffle = False - startup_prog.random_seed = 111 - train_prog.random_seed = 111 - test_prog.random_seed = 111 - - train_py_reader, loss = build_program( - main_prog=train_prog, - startup_prog=startup_prog, - train_params=train_params, - is_train=True) - test_py_reader, map_eval, _, _ = build_program( - main_prog=test_prog, - startup_prog=startup_prog, - train_params=train_params, - is_train=False) - - test_prog = test_prog.clone(for_test=True) - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_prog) - - if pretrained_model: - def if_exist(var): - return os.path.exists(os.path.join(pretrained_model, var.name)) - fluid.io.load_vars(exe, pretrained_model, main_program=train_prog, - predicate=if_exist) - - if parallel: - loss.persistable = True - build_strategy = fluid.BuildStrategy() - build_strategy.enable_inplace = True - train_exe = fluid.ParallelExecutor(main_program=train_prog, - use_cuda=use_gpu, loss_name=loss.name, build_strategy=build_strategy) - - test_reader = reader.test(data_args, val_file_list, batch_size) - test_py_reader.decorate_paddle_reader(test_reader) - - def save_model(postfix, main_prog): - model_path = os.path.join(model_save_dir, postfix) - if os.path.isdir(model_path): - shutil.rmtree(model_path) - print('save models to %s' % (model_path)) - fluid.io.save_persistables(exe, model_path, main_program=main_prog) - - best_map = 0. - test_map = None - def test(epoc_id, best_map): - _, accum_map = map_eval.get_map_var() - map_eval.reset(exe) - every_epoc_map=[] # for CE - test_py_reader.start() - try: - batch_id = 0 - while True: - test_map, = exe.run(test_prog, fetch_list=[accum_map]) - if batch_id % 10 == 0: - every_epoc_map.append(test_map) - print("Batch {0}, map {1}".format(batch_id, test_map)) - batch_id += 1 - except fluid.core.EOFException: - test_py_reader.reset() - mean_map = np.mean(every_epoc_map) - print("Epoc {0}, test map {1}".format(epoc_id, test_map[0])) - if test_map[0] > best_map: - best_map = test_map[0] - save_model('best_model', test_prog) - return best_map, mean_map - - - total_time = 0.0 - for epoc_id in range(epoc_num): - train_reader = reader.train(data_args, - train_file_list, - batch_size_per_device, - shuffle=is_shuffle, - use_multiprocess=args.use_multiprocess, - num_workers=num_workers, - enable_ce=enable_ce) - train_py_reader.decorate_paddle_reader(train_reader) - epoch_idx = epoc_id + 1 - start_time = time.time() - prev_start_time = start_time - every_epoc_loss = [] - batch_id = 0 - train_py_reader.start() - while True: - try: - prev_start_time = start_time - start_time = time.time() - if parallel: - loss_v, = train_exe.run(fetch_list=[loss.name]) - else: - loss_v, = exe.run(train_prog, fetch_list=[loss]) - loss_v = np.mean(np.array(loss_v)) - every_epoc_loss.append(loss_v) - if batch_id % 10 == 0: - print("Epoc {:d}, batch {:d}, loss {:.6f}, time {:.5f}".format( - epoc_id, batch_id, loss_v, start_time - prev_start_time)) - batch_id += 1 - except (fluid.core.EOFException, StopIteration): - train_reader().close() - train_py_reader.reset() - break - - end_time = time.time() - total_time += end_time - start_time - if epoc_id % 10 == 0 or epoc_id == epoc_num - 1: - best_map, mean_map = test(epoc_id, best_map) - print("Best test map {0}".format(best_map)) - # save model - save_model(str(epoc_id), train_prog) - - if enable_ce: - train_avg_loss = np.mean(every_epoc_loss) - if devices_num == 1: - print("kpis train_cost %s" % train_avg_loss) - print("kpis test_acc %s" % mean_map) - print("kpis train_speed %s" % (total_time / epoch_idx)) - else: - print("kpis train_cost_card%s %s" % - (devices_num, train_avg_loss)) - print("kpis test_acc_card%s %s" % - (devices_num, mean_map)) - print("kpis train_speed_card%s %f" % - (devices_num, total_time / epoch_idx)) - - -def main(): - args = parser.parse_args() - print_arguments(args) - - check_cuda(args.use_gpu) - - data_dir = args.data_dir - dataset = args.dataset - assert dataset in ['pascalvoc', 'coco2014', 'coco2017'] - - # for pascalvoc - label_file = 'label_list' - train_file_list = 'trainval.txt' - val_file_list = 'test.txt' - - if dataset == 'coco2014': - train_file_list = 'annotations/instances_train2014.json' - val_file_list = 'annotations/instances_val2014.json' - elif dataset == 'coco2017': - train_file_list = 'annotations/instances_train2017.json' - val_file_list = 'annotations/instances_val2017.json' - - mean_BGR = [float(m) for m in args.mean_BGR.split(",")] - image_shape = [int(m) for m in args.image_shape.split(",")] - train_parameters[dataset]['image_shape'] = image_shape - train_parameters[dataset]['batch_size'] = args.batch_size - train_parameters[dataset]['lr'] = args.learning_rate - train_parameters[dataset]['epoc_num'] = args.epoc_num - train_parameters[dataset]['ap_version'] = args.ap_version - - data_args = reader.Settings( - dataset=args.dataset, - data_dir=data_dir, - label_file=label_file, - resize_h=image_shape[1], - resize_w=image_shape[2], - mean_value=mean_BGR, - apply_distort=True, - apply_expand=True, - ap_version = args.ap_version) - train(args, - data_args, - train_parameters[dataset], - train_file_list=train_file_list, - val_file_list=val_file_list) - - -if __name__ == '__main__': - main() diff --git a/PaddleCV/ssd/utility.py b/PaddleCV/ssd/utility.py deleted file mode 100644 index 5fca8b592f54ff147305783b7d3bf0f29cc1fe41..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/utility.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Contains common utility functions.""" -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import distutils.util -import sys -import six -import paddle.fluid as fluid - - -def print_arguments(args): - """Print argparse's arguments. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - parser.add_argument("name", default="Jonh", type=str, help="User name.") - args = parser.parse_args() - print_arguments(args) - - :param args: Input argparse.Namespace for printing. - :type args: argparse.Namespace - """ - print("----------- Configuration Arguments -----------") - for arg, value in sorted(six.iteritems(vars(args))): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") - - -def add_arguments(argname, type, default, help, argparser, **kwargs): - """Add argparse's argument. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - add_argument("name", str, "Jonh", "User name.", parser) - args = parser.parse_args() - """ - type = distutils.util.strtobool if type == bool else type - argparser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) - -def check_cuda(use_cuda, err = \ - "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \ - Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n" - ): - try: - if use_cuda == True and fluid.is_compiled_with_cuda() == False: - print(err) - sys.exit(1) - except Exception as e: - pass \ No newline at end of file diff --git a/PaddleCV/video/README.md b/PaddleCV/video/README.md deleted file mode 100644 index cfca0180a659a45d8604d5bd96ea252094ce6d1b..0000000000000000000000000000000000000000 --- a/PaddleCV/video/README.md +++ /dev/null @@ -1,2 +0,0 @@ - - 您好,该项目已被迁移,请移步到 [PaddleCV/PaddleVideo](../PaddleVideo) 目录下浏览本项目。 diff --git a/PaddleCV/video_classification/README.md b/PaddleCV/video_classification/README.md deleted file mode 100644 index cfca0180a659a45d8604d5bd96ea252094ce6d1b..0000000000000000000000000000000000000000 --- a/PaddleCV/video_classification/README.md +++ /dev/null @@ -1,2 +0,0 @@ - - 您好,该项目已被迁移,请移步到 [PaddleCV/PaddleVideo](../PaddleVideo) 目录下浏览本项目。 diff --git a/PaddleCV/yolov3/.gitignore b/PaddleCV/yolov3/.gitignore deleted file mode 100644 index 011d9771cdb71009709e4adedff2472e31f4a1b7..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/.gitignore +++ /dev/null @@ -1,12 +0,0 @@ -*.log -*.json -*.jpg -*.png -output/ -checkpoints/ -weights/ -!weights/*.sh -dataset/coco/ -!dataset/coco/*.py -log* -output* diff --git a/PaddleCV/yolov3/.run_ce.sh b/PaddleCV/yolov3/.run_ce.sh deleted file mode 100755 index 4ad4bd78d4cbf11adf58fa38c711798cbb49af46..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/.run_ce.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -#This file is only used for continuous evaluation. -export CUDA_VISIBLE_DEVICES=0 -python train.py --enable_ce True --use_multiprocess False --snapshot_iter 100 --max_iter 200 | python _ce.py -export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python train.py --enable_ce True --use_multiprocess False --snapshot_iter 100 --max_iter 200 | python _ce.py diff --git a/PaddleCV/yolov3/README.md b/PaddleCV/yolov3/README.md deleted file mode 100644 index abef7afd5fcb1c294cd0ad0f26c53cad518ff98d..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/README.md +++ /dev/null @@ -1,332 +0,0 @@ -**该项目已被迁移至[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection), 这个项目包含了更多的检测模型。** - -# YOLOv3 目标检测 - ---- -## 内容 - -- [简介](#简介) -- [快速开始](#快速开始) -- [进阶使用](#进阶使用) -- [FAQ](#faq) -- [参考文献](#参考文献) -- [版本更新](#版本更新) -- [如何贡献代码](#如何贡献代码) -- [作者](#作者) - -## 简介 - -[YOLOv3](https://arxiv.org/abs/1804.02767) 是由 [Joseph Redmon](https://arxiv.org/search/cs?searchtype=author&query=Redmon%2C+J) 和 [Ali Farhadi](https://arxiv.org/search/cs?searchtype=author&query=Farhadi%2C+A) 提出的单阶段检测器, 该检测器与达到同样精度的传统目标检测方法相比,推断速度能达到接近两倍. - -在我们的实现版本中使用了 [Bag of Freebies for Training Object Detection Neural Networks](https://arxiv.org/abs/1902.04103v3) 中提出的图像增强和label smooth等优化方法,精度优于darknet框架的实现版本,在COCO-2017数据集上,我们达到`mAP(0.50:0.95)= 38.9`的精度,比darknet实现版本的精度(33.0)要高5.9. - -同时,在推断速度方面,基于Paddle预测库的加速方法,推断速度比darknet高30%. - -同时推荐用户参考[ IPython Notebook demo](https://aistudio.baidu.com/aistudio/projectDetail/122277) - -## 快速开始 - -### 安装 - -**安装[COCO-API](https://github.com/cocodataset/cocoapi):** - -训练前需要首先下载[COCO-API](https://github.com/cocodataset/cocoapi): - - git clone https://github.com/cocodataset/cocoapi.git - cd cocoapi/PythonAPI - # if cython is not installed - pip install Cython - # Install into global site-packages - make install - # Alternatively, if you do not have permissions or prefer - # not to install the COCO API into global site-packages - python2 setup.py install --user - -**安装[PaddlePaddle](https://github.com/PaddlePaddle/Paddle):** - -在当前目录下运行样例代码需要PadddlePaddle Fluid的v.1.5或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://paddlepaddle.org/documentation/docs/zh/1.5/beginners_guide/install/index_cn.html)中的说明来更新PaddlePaddle。 - -### 数据准备 - -**COCO数据集:** - -在[MS-COCO数据集](http://cocodataset.org/#download)上进行训练,通过如下方式下载数据集。 - -```bash -python dataset/coco/download.py -``` - -数据目录结构如下: - -``` -dataset/coco/ -├── annotations -│   ├── instances_train2014.json -│   ├── instances_train2017.json -│   ├── instances_val2014.json -│   ├── instances_val2017.json -| ... -├── train2017 -│   ├── 000000000009.jpg -│   ├── 000000580008.jpg -| ... -├── val2017 -│   ├── 000000000139.jpg -│   ├── 000000000285.jpg -| ... - -``` - -**自定义数据集:** - -用户可使用自定义的数据集,我们推荐自定义数据集使用COCO数据集格式的标注,并可通过设置`--data_dir`或修改[reader.py](./reader.py#L39)指定数据集路径。使用COCO数据集格式标注时,目录结构可参考上述COCO数据集目录结构。 - -### 模型训练 - -**下载预训练模型:** 本示例提供DarkNet-53预训练[模型](https://paddlemodels.bj.bcebos.com/yolo/darknet53.tar.gz),该模型转换自作者提供的预训练权重[pjreddie/darknet](https://pjreddie.com/media/files/darknet53.conv.74),采用如下命令下载预训练模型: - - sh ./weights/download.sh - -**注意:** Windows用户可通过`./weights/download.sh`中的链接直接下载和解压。 - -通过设置`--pretrain` 加载预训练模型。同时在fine-tune时也采用该设置加载已训练模型。 -请在训练前确认预训练模型下载与加载正确,否则训练过程中损失可能会出现NAN。 - -**开始训练:** 数据准备完毕后,可以通过如下的方式启动训练: - - python train.py \ - --model_save_dir=output/ \ - --pretrain=${path_to_pretrain_model} \ - --data_dir=${path_to_data} \ - --class_num=${category_num} - -- 通过设置`export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7`指定8卡GPU训练。 -- 若在Windows环境下训练模型,建议设置`--use_multiprocess_reader=False`。 -- 通过`--worker_num=`设置多进程数据读取器进程数,默认进程数为8,若训练机器CPU核数较少,建议设小该值。 -- 可选参数见: - - python train.py --help - -**注意:** YOLOv3模型总batch size为64,这里使用8 GPUs每GPU上batch size为8来训练 - -**模型设置:** - -* 模型使用了基于COCO数据集生成的9个先验框:10x13,16x30,33x23,30x61,62x45,59x119,116x90,156x198,373x326 -* YOLOv3模型中,若预测框不是该点最佳匹配框但是和任一ground truth框的重叠大于`ignore_thresh=0.7`,则忽略该预测框的目标性损失 - -**训练策略:** - -* 采用momentum优化算法训练YOLOv3,momentum=0.9。 -* 学习率采用warmup算法,前4000轮学习率从0.0线性增加至0.001。在400000,450000轮时使用0.1,0.01乘子进行学习率衰减,最大训练500000轮。 -* 通过设置`--syncbn=True`可以开启Synchronized batch normalization,该模式下精度会提高 - -**注意:** Synchronized batch normalization只能用于多GPU训练,不能用于CPU训练和单GPU训练。 - -下图为模型训练结果: -

-
-Train Loss -

- -### 模型评估 - -模型评估是指对训练完毕的模型评估各类性能指标。本示例采用[COCO官方评估](http://cocodataset.org/#detections-eval), 用户可通过如下方式下载Paddle发布的YOLOv3[模型](https://paddlemodels.bj.bcebos.com/yolo/yolov3.tar.gz) - - sh ./weights/download.sh - -`eval.py`是评估模块的主要执行程序,调用示例如下: - - python eval.py \ - --dataset=coco2017 \ - --weights=${path_to_weights} \ - --class_num=${category_num} - -- 通过设置`export CUDA_VISIBLE_DEVICES=0`指定单卡GPU评估。 - -若训练时指定`--syncbn=False`, 模型评估精度如下: - -| input size | mAP(IoU=0.50:0.95) | mAP(IoU=0.50) | mAP(IoU=0.75) | -| :------: | :------: | :------: | :------: | -| 608x608 | 37.7 | 59.8 | 40.8 | -| 416x416 | 36.5 | 58.2 | 39.1 | -| 320x320 | 34.1 | 55.4 | 36.3 | - -若训练时指定`--syncbn=True`, 模型评估精度如下: - -| input size | mAP(IoU=0.50:0.95) | mAP(IoU=0.50) | mAP(IoU=0.75) | -| :------: | :------: | :------: | :------: | -| 608x608 | 38.9 | 61.1 | 42.0 | -| 416x416 | 37.5 | 59.6 | 40.2 | -| 320x320 | 34.8 | 56.4 | 36.9 | - -- **注意:** 评估结果基于`pycocotools`评估器,没有滤除`score < 0.05`的预测框,其他框架有此滤除操作会导致精度下降。 - -### 模型推断及可视化 - -模型推断可以获取图像中的物体及其对应的类别,`infer.py`是主要执行程序,调用示例如下: - - python infer.py \ - --dataset=coco2017 \ - --weights=${path_to_weights} \ - --class_num=${category_num} \ - --image_path=data/COCO17/val2017/ \ - --image_name=000000000139.jpg \ - --draw_thresh=0.5 - -- 通过设置`export CUDA_VISIBLE_DEVICES=0`指定单卡GPU预测。 -- 推断结果显示如下,并会在`./output`目录下保存带预测框的图像 - -``` -Image person.jpg detect: - person at [190, 101, 273, 372] score: 0.98832 - dog at [63, 263, 200, 346] score: 0.97049 - horse at [404, 137, 598, 366] score: 0.97305 -Detect result save at ./output/person.png -``` - -下图为模型可视化预测结果: -

- - - -
-YOLOv3 预测可视化 -

- -### Benchmark - -模型训练benchmark: - -| 数据集 | GPU | CUDA | cuDNN | batch size | 训练速度(1 GPU) | 训练速度(8 GPU) | 显存占用(1 GPU) | 显存占用(8 GPU) | -| :-----: | :-: | :--: | :---: | :--------: | :-----------------: | :-----------------: | :------------: | :------------: | -| COCO | Tesla P40 | 8.0 | 7.1 | 8 (per GPU) | 30.2 images/s | 59.3 images/s | 10642 MB/GPU | 10782 MB/GPU | - -模型单卡推断速度: - -| GPU | CUDA | cuDNN | batch size | infer speed(608x608) | infer speed(416x416) | infer speed(320x320) | -| :-: | :--: | :---: | :--------: | :-----: | :-----: | :-----: | -| Tesla P40 | 8.0 | 7.1 | 1 | 48 ms/frame | 29 ms/frame |24 ms/frame | - -### 服务部署 - -进行YOLOv3的服务部署,用户可以在[eval.py](./eval.py#L54)或[infer.py](./infer.py#L47)中保存可部署的推断模型,该模型可以用Paddle预测库加载和部署,参考[Paddle预测库](http://paddlepaddle.org/documentation/docs/zh/1.4/advanced_usage/deploy/index_cn.html) - -## 进阶使用 - -### 背景介绍 - -传统目标检测方法通过两阶段检测,第一阶段生成预选框,第二阶段对预选框进行分类得到类别,而YOLO将目标检测看做是对框位置和类别概率的一个单阶段回归问题,使得YOLO能达到近两倍的检测速度。而YOLOv3在YOLO的基础上引入的多尺度预测,使得YOLOv3网络对于小物体的检测精度大幅提高。 - -### 模型概览 - -[YOLOv3](https://arxiv.org/abs/1804.02767) 是一阶段End2End的目标检测器。其目标检测原理如下图所示: -

-
-YOLOv3检测原理 -

- -### 模型结构 - -YOLOv3将输入图像分成S\*S个格子,每个格子预测B个bounding box,每个bounding box预测内容包括: Location(x, y, w, h)、Confidence Score和C个类别的概率,因此YOLOv3输出层的channel数为B\*(5 + C)。YOLOv3的loss函数也有三部分组成:Location误差,Confidence误差和分类误差。 - -YOLOv3的网络结构如下图所示: -

-
-YOLOv3网络结构 -

- -YOLOv3 的网络结构由基础特征提取网络、multi-scale特征融合层和输出层组成。 - -1. 特征提取网络。YOLOv3使用 [DarkNet53](https://arxiv.org/abs/1612.08242)作为特征提取网络:DarkNet53 基本采用了全卷积网络,用步长为2的卷积操作替代了池化层,同时添加了 Residual 单元,避免在网络层数过深时发生梯度弥散。 - -2. 特征融合层。为了解决之前YOLO版本对小目标不敏感的问题,YOLOv3采用了3个不同尺度的特征图来进行目标检测,分别为13\*13,26\*26,52\*52,用来检测大、中、小三种目标。特征融合层选取 DarkNet 产出的三种尺度特征图作为输入,借鉴了FPN(feature pyramid networks)的思想,通过一系列的卷积层和上采样对各尺度的特征图进行融合。 - -3. 输出层。同样使用了全卷积结构,其中最后一个卷积层的卷积核个数是255:3\*(80+4+1)=255,3表示一个grid cell包含3个bounding box,4表示框的4个坐标信息,1表示Confidence Score,80表示COCO数据集中80个类别的概率。 - -### 模型fine-tune - -对YOLOv3进行fine-tune,用户可用`--pretrain`指定下载好的Paddle发布的YOLOv3[模型](https://paddlemodels.bj.bcebos.com/yolo/yolov3.tar.gz),并把`--class_num`设置为用户数据集的类别数。 - -在fine-tune时,若用户自定义数据集的类别数不等于COCO数据集的80类,则加载权重时不应加载`yolo_output`层的权重,可通过在[train.py](./train.py#L76)使用如下方式加载非`yolo_output`层的权重: - -```python -if cfg.pretrain: - if not os.path.exists(cfg.pretrain): - print("Pretrain weights not found: {}".format(cfg.pretrain)) - - def if_exist(var): - return os.path.exists(os.path.join(cfg.pretrain, var.name)) \ - and var.name.find('yolo_output') < 0 - - fluid.io.load_vars(exe, cfg.pretrain, predicate=if_exist) - -``` - -若用户自定义数据集的类别是COCO数据集类别的子集,`yolo_output`层的权重可以进行裁剪后导入。例如用户数据集有6类分别对应COCO数据集80类中的第`[3, 19, 25, 41, 58, 73]`类,可通过如下方式裁剪`yolo_output`层权重: - -```python -if cfg.pretrain: - if not os.path.exists(cfg.pretrain): - print("Pretrain weights not found: {}".format(cfg.pretrain)) - - def if_exist(var): - return os.path.exists(os.path.join(cfg.pretrain, var.name)) - - fluid.io.load_vars(exe, cfg.pretrain, predicate=if_exist) - - cat_idxs = [3, 19, 25, 41, 58, 73] - # the first 5 channels is x, y, w, h, objectness, - # the following 80 channel is for 80 categories - channel_idxs = np.array(range(5) + [idx + 5 for idx in cat_idxs]) - # we have 3 yolo_output layers - for i in range(3): - # crop conv weights - weights_tensor = fluid.global_scope().find_var( - "yolo_output.{}.conv.weights".format(i)).get_tensor() - weights = np.array(weights_tensor) - # each yolo_output layer has 3 anchors, 85 channels of each anchor - weights = np.concatenate(weights[channel_idxs], - weights[85 + channel_idxs], - weights[170 + channel_idxs]) - weights_tensor.set(weights.astype('float32'), place) - # crop conv bias - bias_tensor = fluid.global_scope().find_var( - "yolo_output.{}.conv.bias".format(i)).get_tensor() - bias = np.array(bias_tensor) - bias = np.concatenate(bias[channel_idxs], - bias[85 + channel_idxs], - bias[150 + channel_idxs]) - bias_tensor.set(bias.astype('float32'), place) - -``` - -## FAQ - -**Q:** 我使用单GPU训练,训练过程中`loss=nan`,这是为什么? -**A:** YOLOv3中`learning_rate=0.001`的设置是针对总batch size为64的情况,若用户的batch size小于该值,建议调小学习率。 - -**Q:** 我训练YOLOv3速度比较慢,要怎么提速? -**A:** YOLOv3的数据增强比较复杂,速度比较慢,可通过在[reader.py](./reader.py#L284)中增加数据读取的进程数来提速。若用户是进行fine-tune,也可将`--no_mixup_iter`设置大于`--max_iter`的值来禁用mixup提升速度。 - -**Q:** 我使用YOLOv3训练两个类别的数据集,训练`loss=nan`或推断结果不符合预期,这是为什么? -**A:** `--label_smooth`参数会把所有正例的目标值设置为`1-1/class_num`,负例的目标值设为`1/class_num`,当`class_num`较小时,这个操作影响过大,可能会出现`loss=nan`或者训练结果错误,类别数较小时建议设置`--label_smooth=False`。若使用Paddle Fluid v1.5及以上版本,我们在C++代码中对这种情况作了保护,设置`--label_smooth=True`也不会出现这些问题。 - -## 参考文献 - -- [You Only Look Once: Unified, Real-Time Object Detection](https://arxiv.org/abs/1506.02640v5), Joseph Redmon, Santosh Divvala, Ross Girshick, Ali Farhadi. -- [YOLOv3: An Incremental Improvement](https://arxiv.org/abs/1804.02767v1), Joseph Redmon, Ali Farhadi. -- [Bag of Freebies for Training Object Detection Neural Networks](https://arxiv.org/abs/1902.04103v3), Zhi Zhang, Tong He, Hang Zhang, Zhongyue Zhang, Junyuan Xie, Mu Li. - -## 版本更新 - -- 1/2019, 新增YOLOv3模型。 -- 4/2019, 新增YOLOv3模型Synchronized batch normalization模式。 - -## 如何贡献代码 - -如果你可以修复某个issue或者增加一个新功能,欢迎给我们提交PR。如果对应的PR被接受了,我们将根据贡献的质量和难度进行打分(0-5分,越高越好)。如果你累计获得了10分,可以联系我们获得面试机会或者为你写推荐信。 - -## 作者 - -- [heavengate](https://github.com/heavengate) -- [tink2123](https://github.com/tink2123) diff --git a/PaddleCV/yolov3/README_en.md b/PaddleCV/yolov3/README_en.md deleted file mode 100644 index 0181250fcdf1836c45365afd2e5a97d3e086c484..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/README_en.md +++ /dev/null @@ -1,332 +0,0 @@ -**This model has been move to [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection), which includes more detection models.** - -# YOLOv3 Objective Detection - ---- -## Table of Contents - -- [Introduction](#introduction) -- [Quick Start](#quick-start) -- [Advanced Usage](#advanced-usage) -- [FAQ](#faq) -- [Reference](#reference) -- [Update](#update) -- [Contribute](#contribute) -- [Author](#author) - -## Introduction - -[YOLOv3](https://arxiv.org/abs/1804.02767) is an one-stage object detector proposed by [Joseph Redmon](https://arxiv.org/search/cs?searchtype=author&query=Redmon%2C+J) and [Ali Farhadi](https://arxiv.org/search/cs?searchtype=author&query=Farhadi%2C+A), which can be nearly twice faster in inference than the SOTA detector with same performance. - -We use many image augment and label smooth tricks from [Bag of Freebies for Training Object Detection Neural Networks](https://arxiv.org/abs/1902.04103v3) in our implement and produce a higher performance than darknet framework. We got `mAP(0.50:0.95)= 38.9` in COCO-2017 dataset, which is 5.9 higher than darknet(33.0) implement. - -With execution acceleration method in Paddle framework prediction library, inference speed of YOLOv3 in our impliment can be 30% faster than darknet framework. - -We also recommend users to take a look at the  [IPython Notebook demo](https://aistudio.baidu.com/aistudio/projectDetail/122277) - -## Quick Start - -### Installation - -**Install [PaddlePaddle](https://github.com/PaddlePaddle/Paddle):** - -Running sample code in this directory requires PaddelPaddle Fluid v.1.5 and later. If the PaddlePaddle on your device is lower than this version, please follow the instructions in [installation document](http://www.paddlepaddle.org/documentation/docs/en/1.5/beginners_guide/install/index_en.html) and make an update. - -**Install the [COCO-API](https://github.com/cocodataset/cocoapi):** - -To train the model, COCO-API is needed. Installation is as follows: - - git clone https://github.com/cocodataset/cocoapi.git - cd cocoapi/PythonAPI - # if cython is not installed - pip install Cython - # Install into global site-packages - make install - # Alternatively, if you do not have permissions or prefer - # not to install the COCO API into global site-packages - python2 setup.py install --user - -### Data preparation - -**COCO dataset:** - -Train the model on [MS-COCO dataset](http://cocodataset.org/#download), we also provide download script as follows: - -```bash -python dataset/coco/download.py -``` - -The data catalog structure is as follows: - -``` - dataset/coco/ - ├── annotations - │   ├── instances_train2014.json - │   ├── instances_train2017.json - │   ├── instances_val2014.json - │   ├── instances_val2017.json - | ... - ├── train2017 - │   ├── 000000000009.jpg - │   ├── 000000580008.jpg - | ... - ├── val2017 - │   ├── 000000000139.jpg - │   ├── 000000000285.jpg - | ... - -``` - -**User defined dataset:** - -You can defined datasets by yourself, we recommend using annotations in COCO format, and you can set dataset directory by `--data_dir` or in [reader.py](./reader.py#L39). When using annotations in COCO format, you can reference the directory structure in COCO dataset above. - -### Training - -**download the pre-trained model:** This sample provides DarkNet-53 pre-trained [model](https://paddlemodels.bj.bcebos.com/yolo/darknet53.tar.gz), which is converted from [pjreddie/darknet](https://pjreddie.com/media/files/darknet53.conv.74). You can download pre-trained model as: - - sh ./weights/download.sh - -**NOTE:** Windows users can download weights from links in `./weights/download.sh`. - -Set `--pretrain` to load pre-trained model. In addition, this parameter is used to load trained model when finetuning as well. -Please make sure that pre-trained model is downloaded and loaded correctly, otherwise, the loss may be NAN during training. - - -**training:** After data preparation, one can start the training step by: - - python train.py \ - --model_save_dir=output/ \ - --pretrain=${path_to_pretrain_model} \ - --data_dir=${path_to_data} \ - --class_num=${category_num} - -- Set `export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7` to specifiy 8 GPUs to train. -- It is recommended to set `--use_multiprocess_reader=False` when training on Windows. -- Set `--worker_num=` to specifiy multiprocess reader worker number, which is default 8, if the number of CPU cores in the training environment is small, it is recommended to set worker number to a small value. -- For more help on arguments: - - python train.py --help - -**NOTE:** The total batch size for YOLOv3 is 64, we use 8 GPUs with batch size as 8 in each GPU for training. - -**model configuration:** - -* The model uses 9 anchors generated based on the COCO dataset, which are `10x13`, `16x30`, `33x23`, `30x61`, `62x45`, `59x119`, `116x90`, `156x198`, `373x326`. -* In YOLOv3, prediction anchor boxes which is not the best but overlap a ground truth boxes over `ignore_thresh=0.7`, objectness loss will be ignored. - -**training strategy:** - -* Use momentum optimizer with momentum=0.9. -* In first 4000 iteration, the learning rate increases linearly from 0.0 to 0.001. Then lr is decayed at 400000, 450000 iteration with multiplier 0.1, 0.01. The maximum iteration is 500200. -* Synchronized batch normalization can be set by `--syncbn=True`, which can produce a higher performance. - -**NOTE:** Synchronized batch normalization can only be used on multiple GPU devices, can not be used on CPU devices or single GPU device. - -Training losses is shown as below: -

-
-Train Loss -

- -### Evaluation - -Evaluation is to evaluate the performance of a trained model. This sample provides `eval.py` which uses a COCO-specific mAP metric defined by [COCO committee](http://cocodataset.org/#detections-eval). You can also download Paddle released YOLOv3 [model](https://paddlemodels.bj.bcebos.com/yolo/yolov3.tar.gz) as: - - sh ./weights/download.sh - -`eval.py` is the main executor for evalution, you can start evalution step by: - - python eval.py \ - --dataset=coco2017 \ - --weights=${path_to_weights} \ - --class_num=${category_num} - -- Set `export CUDA_VISIBLE_DEVICES=0` to specifiy one GPU to eval. - -If train with `--syncbn=False`, Evalutaion result is shown as below: - -| input size | mAP(IoU=0.50:0.95) | mAP(IoU=0.50) | mAP(IoU=0.75) | -| :------: | :------: | :------: | :------: | -| 608x608 | 37.7 | 59.8 | 40.8 | -| 416x416 | 36.5 | 58.2 | 39.1 | -| 320x320 | 34.1 | 55.4 | 36.3 | - -If train with `--syncbn=True`, Evalutaion result is shown as below: - -| input size | mAP(IoU=0.50:0.95) | mAP(IoU=0.50) | mAP(IoU=0.75) | -| :------: | :------: | :------: | :------: | -| 608x608 | 38.9 | 61.1 | 42.0 | -| 416x416 | 37.5 | 59.6 | 40.2 | -| 320x320 | 34.8 | 56.4 | 36.9 | - -- **NOTE:** Evaluations based on `pycocotools` evaluator with score threshold as 0.01, which is same as darknet. Some frameworks evaluates with score threshold as 0.05 will cause a decrease in performance. - -### Inference and Visualization - -Inference is used to get prediction score or image features based on trained models. `infer.py` is the main executor for inference, you can start inference step by: - - python infer.py \ - --dataset=coco2017 \ - --weights=${path_to_weights} \ - --class_num=${category_num} \ - --image_path=data/COCO17/val2017/ \ - --image_name=000000000139.jpg \ - --draw_thresh=0.5 - -- Set `export CUDA_VISIBLE_DEVICES=0` to specifiy one GPU to infer. -- Inference results will be shown as follows, and images with detection boxes will be saved under `./output`. - -``` -Image person.jpg detect: - person at [190, 101, 273, 372] score: 0.98832 - dog at [63, 263, 200, 346] score: 0.97049 - horse at [404, 137, 598, 366] score: 0.97305 -Detect result save at ./output/person.png -``` - -Visualization of inference result examples are shown as below: -

- - - -
-YOLOv3 Visualization Examples -

- -### Benchmark - -Training benchmark: - -| dataset | GPU | CUDA | cuDNN | batch size | train speed (1 GPU) | train speed (8 GPU) | memory (1 GPU) | memory (8 GPU) | -| :-----: | :-: | :--: | :---: | :--------: | :-----------------: | :-----------------: | :------------: | :------------: | -| COCO | Tesla P40 | 8.0 | 7.1 | 8 (per GPU) | 30.2 images/s | 59.3 images/s | 10642 MB/GPU | 10782 MB/GPU | - -Inference speed on single GPU: - -| GPU | CUDA | cuDNN | batch size | infer speed(608x608) | infer speed(416x416) | infer speed(320x320) | -| :-: | :--: | :---: | :--------: | :-----: | :-----: | :-----: | -| Tesla P40 | 8.0 | 7.1 | 1 | 48 ms/frame | 29 ms/frame |24 ms/frame | - -### Inference deployment - -For YOLOv3 inference deployment, you can save YOLOv3 inference model in [eval.py](./eval.py#L54) or [infer.py](./infer.py#L47), inference model can be loaded and deployed by Paddle prediction library, see [Paddle Inference Lib](http://www.paddlepaddle.org/documentation/docs/en/1.4/advanced_usage/deploy/index_en.html). - -## Advanced Usage - -### Background introduction - -Traditional object detection method works with two stages, it generates potential bounding boxes in the first stage and then run classifier on these proposed boxes in the second stage. YOLO reframes object detection as a single regression problem, detect bounding box coordinates and class probabilities in one stage, which can make YOLO networks inference faster than two-stage networks. YOLOv3 uses multi-scale prediction layers, which improves small target detection performance. - -### Model overview - -[YOLOv3](https://arxiv.org/abs/1804.02767) is a one stage end to end detector。The detection principle of YOLOv3 is as follow: -

-
-YOLOv3 detection principle -

- -### Model structure - -YOLOv3 divides the input image in to S\*S grids and predict B bounding boxes in each grid, predictions of boxes include Location(x, y, w, h), Confidence Score and probabilities of C classes, therefore YOLOv3 output layer has B\*(5 + C) channels. YOLOv3 loss consists of three parts: location loss, confidence loss and classification loss. -The bone network of YOLOv3 is darknet53, the structure of YOLOv3 is as follow: -

-
-YOLOv3 structure -

- -YOLOv3 networks are composed of base feature extraction network, multi-scale feature fusion layers, and output layers. - -1. Feature extraction network: YOLOv3 uses [DarkNet53](https://arxiv.org/abs/1612.08242) for feature extracion. Darknet53 uses a full convolution structure, replacing the pooling layer with a convolution operation with step size as 2, and adding residual block to avoid gradient dispersion when the number of network layers is too deep. - -2. Feature fusion layer. In order to solve the problem that the previous YOLO version is not sensitive to small objects, YOLOv3 uses three different scale feature maps for target detection, which are 13\*13, 26\*26, 52\*52, respectively, for detecting large, medium and small objects. The feature fusion layer selects the three scale feature maps produced by DarkNet as input, and draws on the idea of FPN (feature pyramid networks) to fuse the feature maps of each scale through a series of convolutional layers and upsampling. - -3. Output layer: The output layer also uses a full convolution structure. The number of convolution kernels in the last convolutional layer is 255:3\*(80+4+1)=255, and 3 indicates that a grid cell contains 3 bounding boxes. 4 represents the four coordinate information of the box, 1 represents the Confidence Score, and 80 represents the probability of 80 categories in the COCO dataset. - -### Model fine-tune - -For YOLOv3 fine-tuning, you should set `--pretrain` as YOLOv3 [model](https://paddlemodels.bj.bcebos.com/yolo/yolov3.tar.gz) you download, set `--class_num` as category number in your dataset. - -In fine-tuning, weights of `yolo_output` layers should not be loaded when your `--class_num` is not equal to 80 as in COCO dataset, you can load pre-trained weights in [train.py](./train.py#L76) without `yolo_output` layers as: - -```python -if cfg.pretrain: - if not os.path.exists(cfg.pretrain): - print("Pretrain weights not found: {}".format(cfg.pretrain)) - - def if_exist(var): - return os.path.exists(os.path.join(cfg.pretrain, var.name)) \ - and var.name.find('yolo_output') < 0 - - fluid.io.load_vars(exe, cfg.pretrain, predicate=if_exist) - -``` - -If categories in your dataset is a subset of COCO categories, weights of `yolo_output` layers can be cropped for fine-tuning. Suppose you has 6 categories which is `[3, 19, 25, 41, 58, 73]`th in COCO 80 categories, weights can be cropped as: - -```python -if cfg.pretrain: - if not os.path.exists(cfg.pretrain): - print("Pretrain weights not found: {}".format(cfg.pretrain)) - - def if_exist(var): - return os.path.exists(os.path.join(cfg.pretrain, var.name)) - - fluid.io.load_vars(exe, cfg.pretrain, predicate=if_exist) - - cat_idxs = [3, 19, 25, 41, 58, 73] - # the first 5 channels is x, y, w, h, objectness, - # the following 80 channel is for 80 categories - channel_idxs = np.array(range(5) + [idx + 5 for idx in cat_idxs]) - # we have 3 yolo_output layers - for i in range(3): - # crop conv weights - weights_tensor = fluid.global_scope().find_var( - "yolo_output.{}.conv.weights".format(i)).get_tensor() - weights = np.array(weights_tensor) - # each yolo_output layer has 3 anchors, 85 channels of each anchor - weights = np.concatenate(weights[channel_idxs], - weights[85 + channel_idxs], - weights[170 + channel_idxs]) - weights_tensor.set(weights.astype('float32'), place) - # crop conv bias - bias_tensor = fluid.global_scope().find_var( - "yolo_output.{}.conv.bias".format(i)).get_tensor() - bias = np.array(bias_tensor) - bias = np.concatenate(bias[channel_idxs], - bias[85 + channel_idxs], - bias[150 + channel_idxs]) - bias_tensor.set(bias.astype('float32'), place) - -``` - -## FAQ - -**Q:** I train YOLOv3 in single GPU and got `loss=nan`, why? -**A:** `learning_rate=0.001` configuration is for training in 8 GPUs while total batch size is 64, if you train with smaller batch size, please decrease the learning rate. - -**Q:** YOLOv3 training in my machine is very slow, how can I speed it up? -**A:** Image augmentation is very complicated and time consuming in YOLOv3, you can set more workers for reader in [reader.py](./reader.py#L284) for speeding up. If you are fine-tuning, you can also set `--no_mixup_iter` greater than `--max_iter` to disable image mixup. - -**Q:** YOLOv3 training with 2 categories dataset got `loss=nan` or wrong prediction, why? -**A:** Settting `--label_smooth` will smooth target value of positive sample to `1-1/class_num` and target of negative sample to `1/class_num`, if `class_num` is very small, label smooth will incur excessive influence and may cause `loss=nan` or wrong prediction, it is recommend to set `--label_smooth=False` while category number is small. If you are using Paddle Fluid v1.5 and above, this situation is protected in C++ code, setting `--label_smooth=True` will no longer incur such error. - -## Reference - -- [You Only Look Once: Unified, Real-Time Object Detection](https://arxiv.org/abs/1506.02640v5), Joseph Redmon, Santosh Divvala, Ross Girshick, Ali Farhadi. -- [YOLOv3: An Incremental Improvement](https://arxiv.org/abs/1804.02767v1), Joseph Redmon, Ali Farhadi. -- [Bag of Freebies for Training Object Detection Neural Networks](https://arxiv.org/abs/1902.04103v3), Zhi Zhang, Tong He, Hang Zhang, Zhongyue Zhang, Junyuan Xie, Mu Li. - -## Update - -- 1/2019, Add YOLOv3 model. -- 4/2019, Add synchronized batch normalization for YOLOv3. - -## Contribute - -If you can fix a issue or add a new feature, please open a PR to us. If your PR is accepted, you can get scores according to the quality and difficulty of your PR(0~5), while you got 10 scores, you can contact us for interview or recommendation letter. - -## Author - -- [heavengate](https://github.com/heavengate) -- [tink2123](https://github.com/tink2123) diff --git a/PaddleCV/yolov3/_ce.py b/PaddleCV/yolov3/_ce.py deleted file mode 100644 index c0ce52dfd811753bf025babca764eaf232ab246b..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/_ce.py +++ /dev/null @@ -1,48 +0,0 @@ -### This file is only used for continuous evaluation test! -from __future__ import print_function -from __future__ import division -from __future__ import absolute_import -import os -import sys -sys.path.append(os.environ['ceroot']) -from kpi import CostKpi -from kpi import DurationKpi - -train_cost_1card_kpi = CostKpi( - 'train_cost_1card', 0.02, 0, actived=True, desc='train cost') -train_duration_1card_kpi = DurationKpi( - 'train_duration_1card', 0.1, 0, actived=True, desc='train duration') -train_cost_8card_kpi = CostKpi( - 'train_cost_8card', 0.02, 0, actived=True, desc='train cost') -train_duration_8card_kpi = DurationKpi( - 'train_duration_8card', 0.1, 0, actived=True, desc='train duration') -tracking_kpis = [ - train_cost_1card_kpi, train_duration_1card_kpi, train_cost_8card_kpi, - train_duration_8card_kpi -] - - -def parse_log(log): - for line in log.split('\n'): - fs = line.strip().split('\t') - print(fs) - if len(fs) == 3 and fs[0] == 'kpis': - print("-----%s" % fs) - kpi_name = fs[1] - kpi_value = float(fs[2]) - yield kpi_name, kpi_value - - -def log_to_ce(log): - kpi_tracker = {} - for kpi in tracking_kpis: - kpi_tracker[kpi.name] = kpi - for (kpi_name, kpi_value) in parse_log(log): - print(kpi_name, kpi_value) - kpi_tracker[kpi_name].add_record(kpi_value) - kpi_tracker[kpi_name].persist() - - -if __name__ == '__main__': - log = sys.stdin.read() - log_to_ce(log) diff --git a/PaddleCV/yolov3/box_utils.py b/PaddleCV/yolov3/box_utils.py deleted file mode 100644 index b5bc42502eb2c79f36be624751170d71575a7820..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/box_utils.py +++ /dev/null @@ -1,205 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np - -import matplotlib -matplotlib.use('Agg') -from matplotlib import pyplot as plt -from PIL import Image - - -def coco_anno_box_to_center_relative(box, img_height, img_width): - """ - Convert COCO annotations box with format [x1, y1, w, h] to - center mode [center_x, center_y, w, h] and divide image width - and height to get relative value in range[0, 1] - """ - assert len(box) == 4, "box should be a len(4) list or tuple" - x, y, w, h = box - - x1 = max(x, 0) - x2 = min(x + w - 1, img_width - 1) - y1 = max(y, 0) - y2 = min(y + h - 1, img_height - 1) - - x = (x1 + x2) / 2 / img_width - y = (y1 + y2) / 2 / img_height - w = (x2 - x1) / img_width - h = (y2 - y1) / img_height - - return np.array([x, y, w, h]) - - -def clip_relative_box_in_image(x, y, w, h): - """Clip relative box coordinates x, y, w, h to [0, 1]""" - x1 = max(x - w / 2, 0.) - x2 = min(x + w / 2, 1.) - y1 = min(y - h / 2, 0.) - y2 = max(y + h / 2, 1.) - x = (x1 + x2) / 2 - y = (y1 + y2) / 2 - w = x2 - x1 - h = y2 - y1 - - -def box_xywh_to_xyxy(box): - shape = box.shape - assert shape[-1] == 4, "Box shape[-1] should be 4." - - box = box.reshape((-1, 4)) - box[:, 0], box[:, 2] = box[:, 0] - box[:, 2] / 2, box[:, 0] + box[:, 2] / 2 - box[:, 1], box[:, 3] = box[:, 1] - box[:, 3] / 2, box[:, 1] + box[:, 3] / 2 - box = box.reshape(shape) - return box - - -def box_iou_xywh(box1, box2): - assert box1.shape[-1] == 4, "Box1 shape[-1] should be 4." - assert box2.shape[-1] == 4, "Box2 shape[-1] should be 4." - - b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 - b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 - b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 - b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 - - inter_x1 = np.maximum(b1_x1, b2_x1) - inter_x2 = np.minimum(b1_x2, b2_x2) - inter_y1 = np.maximum(b1_y1, b2_y1) - inter_y2 = np.minimum(b1_y2, b2_y2) - inter_w = inter_x2 - inter_x1 + 1 - inter_h = inter_y2 - inter_y1 + 1 - inter_w[inter_w < 0] = 0 - inter_h[inter_h < 0] = 0 - - inter_area = inter_w * inter_h - b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) - b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) - - return inter_area / (b1_area + b2_area - inter_area) - - -def box_iou_xyxy(box1, box2): - assert box1.shape[-1] == 4, "Box1 shape[-1] should be 4." - assert box2.shape[-1] == 4, "Box2 shape[-1] should be 4." - - b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] - b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] - - inter_x1 = np.maximum(b1_x1, b2_x1) - inter_x2 = np.minimum(b1_x2, b2_x2) - inter_y1 = np.maximum(b1_y1, b2_y1) - inter_y2 = np.minimum(b1_y2, b2_y2) - inter_w = inter_x2 - inter_x1 - inter_h = inter_y2 - inter_y1 - inter_w[inter_w < 0] = 0 - inter_h[inter_h < 0] = 0 - - inter_area = inter_w * inter_h - b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1) - b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) - - return inter_area / (b1_area + b2_area - inter_area) - - -def box_crop(boxes, labels, scores, crop, img_shape): - x, y, w, h = map(float, crop) - im_w, im_h = map(float, img_shape) - - boxes = boxes.copy() - boxes[:, 0], boxes[:, 2] = (boxes[:, 0] - boxes[:, 2] / 2) * im_w, ( - boxes[:, 0] + boxes[:, 2] / 2) * im_w - boxes[:, 1], boxes[:, 3] = (boxes[:, 1] - boxes[:, 3] / 2) * im_h, ( - boxes[:, 1] + boxes[:, 3] / 2) * im_h - - crop_box = np.array([x, y, x + w, y + h]) - centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0 - mask = np.logical_and(crop_box[:2] <= centers, centers <= crop_box[2:]).all( - axis=1) - - boxes[:, :2] = np.maximum(boxes[:, :2], crop_box[:2]) - boxes[:, 2:] = np.minimum(boxes[:, 2:], crop_box[2:]) - boxes[:, :2] -= crop_box[:2] - boxes[:, 2:] -= crop_box[:2] - - mask = np.logical_and(mask, (boxes[:, :2] < boxes[:, 2:]).all(axis=1)) - boxes = boxes * np.expand_dims(mask.astype('float32'), axis=1) - labels = labels * mask.astype('float32') - scores = scores * mask.astype('float32') - boxes[:, 0], boxes[:, 2] = (boxes[:, 0] + boxes[:, 2]) / 2 / w, ( - boxes[:, 2] - boxes[:, 0]) / w - boxes[:, 1], boxes[:, 3] = (boxes[:, 1] + boxes[:, 3]) / 2 / h, ( - boxes[:, 3] - boxes[:, 1]) / h - - return boxes, labels, scores, mask.sum() - - -def draw_boxes_on_image(image_path, - boxes, - scores, - labels, - label_names, - score_thresh=0.5): - image = np.array(Image.open(image_path)) - plt.figure() - _, ax = plt.subplots(1) - ax.imshow(image) - - image_name = image_path.split('/')[-1] - print("Image {} detect: ".format(image_name)) - colors = {} - for box, score, label in zip(boxes, scores, labels): - if score < score_thresh: - continue - if box[2] <= box[0] or box[3] <= box[1]: - continue - label = int(label) - if label not in colors: - colors[label] = plt.get_cmap('hsv')(label / len(label_names)) - x1, y1, x2, y2 = box[0], box[1], box[2], box[3] - rect = plt.Rectangle( - (x1, y1), - x2 - x1, - y2 - y1, - fill=False, - linewidth=2.0, - edgecolor=colors[label]) - ax.add_patch(rect) - ax.text( - x1, - y1, - '{} {:.4f}'.format(label_names[label], score), - verticalalignment='bottom', - horizontalalignment='left', - bbox={'facecolor': colors[label], - 'alpha': 0.5, - 'pad': 0}, - fontsize=8, - color='white') - print("\t {:15s} at {:25} score: {:.5f}".format(label_names[int( - label)], str(list(map(int, list(box)))), score)) - image_name = image_name.replace('jpg', 'png') - plt.axis('off') - plt.gca().xaxis.set_major_locator(plt.NullLocator()) - plt.gca().yaxis.set_major_locator(plt.NullLocator()) - plt.savefig( - "./output/{}".format(image_name), bbox_inches='tight', pad_inches=0.0) - print("Detect result save at ./output/{}\n".format(image_name)) - plt.cla() - plt.close('all') diff --git a/PaddleCV/yolov3/config.py b/PaddleCV/yolov3/config.py deleted file mode 100644 index 784cffed0f50a978881ede200cd11edc51689cce..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/config.py +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from edict import AttrDict -import six -import numpy as np - -_C = AttrDict() -cfg = _C - -# -# Training options -# - -# Snapshot period -_C.snapshot_iter = 2000 - -# min valid area for gt boxes -_C.gt_min_area = -1 - -# max target box number in an image -_C.max_box_num = 50 - -# -# Training options -# - -# valid score threshold to include boxes -_C.valid_thresh = 0.005 - -# threshold vale for box non-max suppression -_C.nms_thresh = 0.45 - -# the number of top k boxes to perform nms -_C.nms_topk = 400 - -# the number of output boxes after nms -_C.nms_posk = 100 - -# score threshold for draw box in debug mode -_C.draw_thresh = 0.5 - -# -# Model options -# - -# pixel mean values -_C.pixel_means = [0.485, 0.456, 0.406] - -# pixel std values -_C.pixel_stds = [0.229, 0.224, 0.225] - -# anchors box weight and height -_C.anchors = [ - 10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326 -] - -# anchor mask of each yolo layer -_C.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - -# IoU threshold to ignore objectness loss of pred box -_C.ignore_thresh = .7 - -# -# SOLVER options -# - -# batch size -_C.batch_size = 8 - -# derived learning rate the to get the final learning rate. -_C.learning_rate = 0.001 - -# maximum number of iterations -_C.max_iter = 500200 - -# warm up to learning rate -_C.warm_up_iter = 4000 -_C.warm_up_factor = 0. - -# lr steps_with_decay -_C.lr_steps = [400000, 450000] -_C.lr_gamma = 0.1 - -# L2 regularization hyperparameter -_C.weight_decay = 0.0005 - -# momentum with SGD -_C.momentum = 0.9 - -# -# ENV options -# - -# support both CPU and GPU -_C.use_gpu = True - -# Class number -_C.class_num = 80 - -# dataset path -_C.train_file_list = 'annotations/instances_train2017.json' -_C.train_data_dir = 'train2017' -_C.val_file_list = 'annotations/instances_val2017.json' -_C.val_data_dir = 'val2017' - - -def merge_cfg_from_args(args): - """Merge config keys, values in args into the global config.""" - for k, v in sorted(six.iteritems(vars(args))): - try: - value = eval(v) - except: - value = v - _C[k] = value diff --git a/PaddleCV/yolov3/data_utils.py b/PaddleCV/yolov3/data_utils.py deleted file mode 100644 index f5c5246346a7ef4b568bbb3f3681793d36c22749..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/data_utils.py +++ /dev/null @@ -1,168 +0,0 @@ -""" -This code is based on https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py -""" - -import os -import sys -import signal -import time -import numpy as np -import threading -import multiprocessing -try: - import queue -except ImportError: - import Queue as queue - - -# handle terminate reader process, do not print stack frame -def _reader_quit(signum, frame): - print("Reader process exit.") - sys.exit() - -def _term_group(sig_num, frame): - print('pid {} terminated, terminate group ' - '{}...'.format(os.getpid(), os.getpgrp())) - os.killpg(os.getpgid(os.getpid()), signal.SIGKILL) - -signal.signal(signal.SIGTERM, _reader_quit) -signal.signal(signal.SIGINT, _term_group) - - -class GeneratorEnqueuer(object): - """ - Builds a queue out of a data generator. - - Args: - generator: a generator function which endlessly yields data - use_multiprocessing (bool): use multiprocessing if True, - otherwise use threading. - wait_time (float): time to sleep in-between calls to `put()`. - random_seed (int): Initial seed for workers, - will be incremented by one for each workers. - """ - - def __init__(self, - generator, - use_multiprocessing=False, - wait_time=0.05, - random_seed=None): - self.wait_time = wait_time - self._generator = generator - self._use_multiprocessing = use_multiprocessing - self._threads = [] - self._stop_event = None - self.queue = None - self._manager = None - self.seed = random_seed - - def start(self, workers=1, max_queue_size=10): - """ - Start worker threads which add data from the generator into the queue. - - Args: - workers (int): number of worker threads - max_queue_size (int): queue size - (when full, threads could block on `put()`) - """ - - def data_generator_task(): - """ - Data generator task. - """ - - def task(): - if (self.queue is not None and - self.queue.qsize() < max_queue_size): - generator_output = next(self._generator) - self.queue.put((generator_output)) - else: - time.sleep(self.wait_time) - - if not self._use_multiprocessing: - while not self._stop_event.is_set(): - with self.genlock: - try: - task() - except Exception: - self._stop_event.set() - break - else: - while not self._stop_event.is_set(): - try: - task() - except Exception: - self._stop_event.set() - break - - try: - if self._use_multiprocessing: - self._manager = multiprocessing.Manager() - self.queue = self._manager.Queue(maxsize=max_queue_size) - self._stop_event = multiprocessing.Event() - else: - self.genlock = threading.Lock() - self.queue = queue.Queue() - self._stop_event = threading.Event() - for _ in range(workers): - if self._use_multiprocessing: - # Reset random seed else all children processes - # share the same seed - np.random.seed(self.seed) - thread = multiprocessing.Process(target=data_generator_task) - thread.daemon = True - if self.seed is not None: - self.seed += 1 - else: - thread = threading.Thread(target=data_generator_task) - self._threads.append(thread) - thread.start() - except: - self.stop() - raise - - def is_running(self): - """ - Returns: - bool: Whether the worker theads are running. - """ - return self._stop_event is not None and not self._stop_event.is_set() - - def stop(self, timeout=None): - """ - Stops running threads and wait for them to exit, if necessary. - Should be called by the same thread which called `start()`. - - Args: - timeout(int|None): maximum time to wait on `thread.join()`. - """ - if self.is_running(): - self._stop_event.set() - for thread in self._threads: - if self._use_multiprocessing: - if thread.is_alive(): - thread.join(timeout) - else: - thread.join(timeout) - if self._manager: - self._manager.shutdown() - - self._threads = [] - self._stop_event = None - self.queue = None - - def get(self): - """ - Creates a generator to extract data from the queue. - Skip the data if it is `None`. - - # Yields - tuple of data in the queue. - """ - while self.is_running(): - if not self.queue.empty(): - inputs = self.queue.get() - if inputs is not None: - yield inputs - else: - time.sleep(self.wait_time) diff --git a/PaddleCV/yolov3/dataset/coco/download.py b/PaddleCV/yolov3/dataset/coco/download.py deleted file mode 100644 index 9df49bef6eab9d615e61e3cd429dcfdbeb5708ce..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/dataset/coco/download.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import os.path as osp -import sys -import zipfile -import logging - -from paddle.dataset.common import download - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -DATASETS = { - 'coco': [ - # coco2017 - ('http://images.cocodataset.org/zips/train2017.zip', - 'cced6f7f71b7629ddf16f17bbcfab6b2', ), - ('http://images.cocodataset.org/zips/val2017.zip', - '442b8da7639aecaf257c1dceb8ba8c80', ), - ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', - 'f4bbac642086de4f52a3fdda2de5fa2c', ), - # coco2014 - ('http://images.cocodataset.org/zips/train2014.zip', - '0da8c0bd3d6becc4dcb32757491aca88', ), - ('http://images.cocodataset.org/zips/val2014.zip', - 'a3d79f5ed8d289b7a7554ce06a5782b3', ), - ('http://images.cocodataset.org/annotations/annotations_trainval2014.zip', - '0a379cfc70b0e71301e0f377548639bd', ), - ], -} - - -def download_decompress_file(data_dir, url, md5): - logger.info("Downloading from {}".format(url)) - zip_file = download(url, data_dir, md5) - logger.info("Decompressing {}".format(zip_file)) - with zipfile.ZipFile(zip_file) as zf: - zf.extractall(path=data_dir) - os.remove(zip_file) - - -if __name__ == "__main__": - data_dir = osp.split(osp.realpath(sys.argv[0]))[0] - for name, infos in DATASETS.items(): - for info in infos: - download_decompress_file(data_dir, info[0], info[1]) - logger.info("Download dataset {} finished.".format(name)) diff --git a/PaddleCV/yolov3/dist_utils.py b/PaddleCV/yolov3/dist_utils.py deleted file mode 100644 index 9fb03f1bd351a87d758eb84133ab25b25530e864..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/dist_utils.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import paddle.fluid as fluid - -def nccl2_prepare(trainer_id, startup_prog, main_prog): - config = fluid.DistributeTranspilerConfig() - config.mode = "nccl2" - t = fluid.DistributeTranspiler(config=config) - t.transpile(trainer_id, - trainers=os.environ.get('PADDLE_TRAINER_ENDPOINTS'), - current_endpoint=os.environ.get('PADDLE_CURRENT_ENDPOINT'), - startup_program=startup_prog, - program=main_prog) - -def prepare_for_multi_process(exe, build_strategy, train_prog): - # prepare for multi-process - trainer_id = int(os.environ.get('PADDLE_TRAINER_ID', 0)) - num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - if num_trainers < 2: return - print("PADDLE_TRAINERS_NUM", num_trainers) - print("PADDLE_TRAINER_ID", trainer_id) - build_strategy.num_trainers = num_trainers - build_strategy.trainer_id = trainer_id - # NOTE(zcd): use multi processes to train the model, - # and each process use one GPU card. - startup_prog = fluid.Program() - nccl2_prepare(trainer_id, startup_prog, train_prog) - # the startup_prog are run two times, but it doesn't matter. - exe.run(startup_prog) diff --git a/PaddleCV/yolov3/edict.py b/PaddleCV/yolov3/edict.py deleted file mode 100644 index 552ede8e4006b5d4e90dd85d566749fd624c26d1..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/edict.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - - -class AttrDict(dict): - def __init__(self, *args, **kwargs): - super(AttrDict, self).__init__(*args, **kwargs) - - def __getattr__(self, name): - if name in self.__dict__: - return self.__dict__[name] - elif name in self: - return self[name] - else: - raise AttributeError(name) - - def __setattr__(self, name, value): - if name in self.__dict__: - self.__dict__[name] = value - else: - self[name] = value diff --git a/PaddleCV/yolov3/eval.py b/PaddleCV/yolov3/eval.py deleted file mode 100644 index 3443ccfcbff186e2db51ce795bb1569160c81ddb..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/eval.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import io -import six -import time -import json -import numpy as np -import paddle -import paddle.fluid as fluid -import reader -from models.yolov3 import YOLOv3 -from utility import print_arguments, parse_args, check_gpu -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval, Params -from config import cfg - - -def eval(): - # check if set use_gpu=True in paddlepaddle cpu version - check_gpu(cfg.use_gpu) - - if '2014' in cfg.dataset: - test_list = 'annotations/instances_val2014.json' - elif '2017' in cfg.dataset: - test_list = 'annotations/instances_val2017.json' - - if cfg.debug: - if not os.path.exists('output'): - os.mkdir('output') - - model = YOLOv3(is_train=False) - model.build_model() - outputs = model.get_pred() - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - # yapf: disable - if cfg.weights: - def if_exist(var): - return os.path.exists(os.path.join(cfg.weights, var.name)) - fluid.io.load_vars(exe, cfg.weights, predicate=if_exist) - # yapf: enable - - # you can save inference model by following code - # fluid.io.save_inference_model("./output/yolov3", - # feeded_var_names=['image', 'im_shape'], - # target_vars=outputs, - # executor=exe) - - input_size = cfg.input_size - test_reader = reader.test(input_size, 1) - label_names, label_ids = reader.get_label_infos() - if cfg.debug: - print("Load in labels {} with ids {}".format(label_names, label_ids)) - feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) - - def get_pred_result(boxes, scores, labels, im_id): - result = [] - for box, score, label in zip(boxes, scores, labels): - x1, y1, x2, y2 = box - w = x2 - x1 + 1 - h = y2 - y1 + 1 - bbox = [x1, y1, w, h] - - res = { - 'image_id': im_id, - 'category_id': label_ids[int(label)], - 'bbox': list(map(float, bbox)), - 'score': float(score) - } - result.append(res) - return result - - dts_res = [] - fetch_list = [outputs] - total_time = 0 - for batch_id, batch_data in enumerate(test_reader()): - start_time = time.time() - batch_outputs = exe.run(fetch_list=[v.name for v in fetch_list], - feed=feeder.feed(batch_data), - return_numpy=False, - use_program_cache=True) - lod = batch_outputs[0].lod()[0] - nmsed_boxes = np.array(batch_outputs[0]) - if nmsed_boxes.shape[1] != 6: - continue - for i in range(len(lod) - 1): - im_id = batch_data[i][1] - start = lod[i] - end = lod[i + 1] - if start == end: - continue - nmsed_box = nmsed_boxes[start:end, :] - labels = nmsed_box[:, 0] - scores = nmsed_box[:, 1] - boxes = nmsed_box[:, 2:6] - dts_res += get_pred_result(boxes, scores, labels, im_id) - - end_time = time.time() - print("batch id: {}, time: {}".format(batch_id, end_time - start_time)) - total_time += end_time - start_time - - with io.open("yolov3_result.json", 'w') as outfile: - encode_func = unicode if six.PY2 else str - outfile.write(encode_func(json.dumps(dts_res))) - print("start evaluate detection result with coco api") - coco = COCO(os.path.join(cfg.data_dir, test_list)) - cocoDt = coco.loadRes("yolov3_result.json") - cocoEval = COCOeval(coco, cocoDt, 'bbox') - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - print("evaluate done.") - - print("Time per batch: {}".format(total_time / batch_id)) - - -if __name__ == '__main__': - args = parse_args() - print_arguments(args) - eval() diff --git a/PaddleCV/yolov3/image/000000000139.png b/PaddleCV/yolov3/image/000000000139.png deleted file mode 100644 index a2e3d5d0cd9f6c05ecef83794486410949b53762..0000000000000000000000000000000000000000 Binary files a/PaddleCV/yolov3/image/000000000139.png and /dev/null differ diff --git a/PaddleCV/yolov3/image/000000127517.png b/PaddleCV/yolov3/image/000000127517.png deleted file mode 100644 index ef04630142bccf1fe8be78f73c4000c02209f3e4..0000000000000000000000000000000000000000 Binary files a/PaddleCV/yolov3/image/000000127517.png and /dev/null differ diff --git a/PaddleCV/yolov3/image/000000203864.png b/PaddleCV/yolov3/image/000000203864.png deleted file mode 100644 index 8067fd8065c272f86952cd289418b4d3d1d44643..0000000000000000000000000000000000000000 Binary files a/PaddleCV/yolov3/image/000000203864.png and /dev/null differ diff --git a/PaddleCV/yolov3/image/000000515077.png b/PaddleCV/yolov3/image/000000515077.png deleted file mode 100644 index 70bbbe6f640fad5394da02e217f52f6912ee3dd3..0000000000000000000000000000000000000000 Binary files a/PaddleCV/yolov3/image/000000515077.png and /dev/null differ diff --git a/PaddleCV/yolov3/image/YOLOv3.jpg b/PaddleCV/yolov3/image/YOLOv3.jpg deleted file mode 100644 index 06b81f545247c1d542fd661f947eb0cf3edc480e..0000000000000000000000000000000000000000 Binary files a/PaddleCV/yolov3/image/YOLOv3.jpg and /dev/null differ diff --git a/PaddleCV/yolov3/image/YOLOv3_structure.jpg b/PaddleCV/yolov3/image/YOLOv3_structure.jpg deleted file mode 100644 index 51bd2d1733e2f78945d3e871cb5b649aad95d633..0000000000000000000000000000000000000000 Binary files a/PaddleCV/yolov3/image/YOLOv3_structure.jpg and /dev/null differ diff --git a/PaddleCV/yolov3/image/dog.jpg b/PaddleCV/yolov3/image/dog.jpg deleted file mode 100644 index 77b0381222eaed50867643f4166092c781e56d5b..0000000000000000000000000000000000000000 Binary files a/PaddleCV/yolov3/image/dog.jpg and /dev/null differ diff --git a/PaddleCV/yolov3/image/eagle.jpg b/PaddleCV/yolov3/image/eagle.jpg deleted file mode 100644 index 8b7509505b01a766bbf637dcbb1e2c5f24903ac5..0000000000000000000000000000000000000000 Binary files a/PaddleCV/yolov3/image/eagle.jpg and /dev/null differ diff --git a/PaddleCV/yolov3/image/giraffe.jpg b/PaddleCV/yolov3/image/giraffe.jpg deleted file mode 100644 index a93e8b88398d94a7454f201372317a9414344c7c..0000000000000000000000000000000000000000 Binary files a/PaddleCV/yolov3/image/giraffe.jpg and /dev/null differ diff --git a/PaddleCV/yolov3/image/horses.jpg b/PaddleCV/yolov3/image/horses.jpg deleted file mode 100644 index 3a761f46ba08ed459af026b59f6b91b6fa597dd1..0000000000000000000000000000000000000000 Binary files a/PaddleCV/yolov3/image/horses.jpg and /dev/null differ diff --git a/PaddleCV/yolov3/image/kite.jpg b/PaddleCV/yolov3/image/kite.jpg deleted file mode 100644 index 9eb325ac5fc375cb2513380087dd713be9be19d8..0000000000000000000000000000000000000000 Binary files a/PaddleCV/yolov3/image/kite.jpg and /dev/null differ diff --git a/PaddleCV/yolov3/image/person.jpg b/PaddleCV/yolov3/image/person.jpg deleted file mode 100644 index 61d377fff94d48c365b0cf18edcd4de38b229465..0000000000000000000000000000000000000000 Binary files a/PaddleCV/yolov3/image/person.jpg and /dev/null differ diff --git a/PaddleCV/yolov3/image/scream.jpg b/PaddleCV/yolov3/image/scream.jpg deleted file mode 100644 index 43f2c36a8d4df72c4f8621b377944e05f6c1fa08..0000000000000000000000000000000000000000 Binary files a/PaddleCV/yolov3/image/scream.jpg and /dev/null differ diff --git a/PaddleCV/yolov3/image/train_loss.png b/PaddleCV/yolov3/image/train_loss.png deleted file mode 100644 index f16728e95d781d996639a35b54a944e91af6b640..0000000000000000000000000000000000000000 Binary files a/PaddleCV/yolov3/image/train_loss.png and /dev/null differ diff --git a/PaddleCV/yolov3/image_utils.py b/PaddleCV/yolov3/image_utils.py deleted file mode 100644 index 16edd255c395fa814a7cf7041be0175d1bee8bb2..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/image_utils.py +++ /dev/null @@ -1,233 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np -import cv2 -from PIL import Image, ImageEnhance -import random - -import box_utils - - -def random_distort(img): - def random_brightness(img, lower=0.5, upper=1.5): - e = np.random.uniform(lower, upper) - return ImageEnhance.Brightness(img).enhance(e) - - def random_contrast(img, lower=0.5, upper=1.5): - e = np.random.uniform(lower, upper) - return ImageEnhance.Contrast(img).enhance(e) - - def random_color(img, lower=0.5, upper=1.5): - e = np.random.uniform(lower, upper) - return ImageEnhance.Color(img).enhance(e) - - ops = [random_brightness, random_contrast, random_color] - np.random.shuffle(ops) - - img = Image.fromarray(img) - img = ops[0](img) - img = ops[1](img) - img = ops[2](img) - img = np.asarray(img) - - return img - - -def random_crop(img, - boxes, - labels, - scores, - scales=[0.3, 1.0], - max_ratio=2.0, - constraints=None, - max_trial=50): - if len(boxes) == 0: - return img, boxes - - if not constraints: - constraints = [(0.1, 1.0), (0.3, 1.0), (0.5, 1.0), (0.7, 1.0), - (0.9, 1.0), (0.0, 1.0)] - - img = Image.fromarray(img) - w, h = img.size - crops = [(0, 0, w, h)] - for min_iou, max_iou in constraints: - for _ in range(max_trial): - scale = random.uniform(scales[0], scales[1]) - aspect_ratio = random.uniform(max(1 / max_ratio, scale * scale), \ - min(max_ratio, 1 / scale / scale)) - crop_h = int(h * scale / np.sqrt(aspect_ratio)) - crop_w = int(w * scale * np.sqrt(aspect_ratio)) - crop_x = random.randrange(w - crop_w) - crop_y = random.randrange(h - crop_h) - crop_box = np.array([[(crop_x + crop_w / 2.0) / w, - (crop_y + crop_h / 2.0) / h, - crop_w / float(w), crop_h / float(h)]]) - - iou = box_utils.box_iou_xywh(crop_box, boxes) - if min_iou <= iou.min() and max_iou >= iou.max(): - crops.append((crop_x, crop_y, crop_w, crop_h)) - break - - while crops: - crop = crops.pop(np.random.randint(0, len(crops))) - crop_boxes, crop_labels, crop_scores, box_num = \ - box_utils.box_crop(boxes, labels, scores, crop, (w, h)) - if box_num < 1: - continue - img = img.crop((crop[0], crop[1], crop[0] + crop[2], - crop[1] + crop[3])).resize(img.size, Image.LANCZOS) - img = np.asarray(img) - return img, crop_boxes, crop_labels, crop_scores - img = np.asarray(img) - return img, boxes, labels, scores - - -def random_flip(img, gtboxes, thresh=0.5): - if random.random() > thresh: - img = img[:, ::-1, :] - gtboxes[:, 0] = 1.0 - gtboxes[:, 0] - return img, gtboxes - - -def random_interp(img, size, interp=None): - interp_method = [ - cv2.INTER_NEAREST, - cv2.INTER_LINEAR, - cv2.INTER_AREA, - cv2.INTER_CUBIC, - cv2.INTER_LANCZOS4, - ] - if not interp or interp not in interp_method: - interp = interp_method[random.randint(0, len(interp_method) - 1)] - h, w, _ = img.shape - im_scale_x = size / float(w) - im_scale_y = size / float(h) - img = cv2.resize( - img, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=interp) - return img - - -def random_expand(img, - gtboxes, - max_ratio=4., - fill=None, - keep_ratio=True, - thresh=0.5): - if random.random() > thresh: - return img, gtboxes - - if max_ratio < 1.0: - return img, gtboxes - - h, w, c = img.shape - ratio_x = random.uniform(1, max_ratio) - if keep_ratio: - ratio_y = ratio_x - else: - ratio_y = random.uniform(1, max_ratio) - oh = int(h * ratio_y) - ow = int(w * ratio_x) - off_x = random.randint(0, ow - w) - off_y = random.randint(0, oh - h) - - out_img = np.zeros((oh, ow, c)) - if fill and len(fill) == c: - for i in range(c): - out_img[:, :, i] = fill[i] * 255.0 - - out_img[off_y:off_y + h, off_x:off_x + w, :] = img - gtboxes[:, 0] = ((gtboxes[:, 0] * w) + off_x) / float(ow) - gtboxes[:, 1] = ((gtboxes[:, 1] * h) + off_y) / float(oh) - gtboxes[:, 2] = gtboxes[:, 2] / ratio_x - gtboxes[:, 3] = gtboxes[:, 3] / ratio_y - - return out_img.astype('uint8'), gtboxes - - -def shuffle_gtbox(gtbox, gtlabel, gtscore): - gt = np.concatenate( - [gtbox, gtlabel[:, np.newaxis], gtscore[:, np.newaxis]], axis=1) - idx = np.arange(gt.shape[0]) - np.random.shuffle(idx) - gt = gt[idx, :] - return gt[:, :4], gt[:, 4], gt[:, 5] - - -def image_mixup(img1, gtboxes1, gtlabels1, gtscores1, img2, gtboxes2, gtlabels2, - gtscores2): - factor = np.random.beta(1.5, 1.5) - factor = max(0.0, min(1.0, factor)) - if factor >= 1.0: - return img1, gtboxes1, gtlabels1 - if factor <= 0.0: - return img2, gtboxes2, gtlabels2 - gtscores1 = gtscores1 * factor - gtscores2 = gtscores2 * (1.0 - factor) - - h = max(img1.shape[0], img2.shape[0]) - w = max(img1.shape[1], img2.shape[1]) - img = np.zeros((h, w, img1.shape[2]), 'float32') - img[:img1.shape[0], :img1.shape[1], :] = img1.astype('float32') * factor - img[:img2.shape[0], :img2.shape[1], :] += \ - img2.astype('float32') * (1.0 - factor) - gtboxes = np.zeros_like(gtboxes1) - gtlabels = np.zeros_like(gtlabels1) - gtscores = np.zeros_like(gtscores1) - - gt_valid_mask1 = np.logical_and(gtboxes1[:, 2] > 0, gtboxes1[:, 3] > 0) - gtboxes1 = gtboxes1[gt_valid_mask1] - gtlabels1 = gtlabels1[gt_valid_mask1] - gtscores1 = gtscores1[gt_valid_mask1] - gtboxes1[:, 0] = gtboxes1[:, 0] * img1.shape[1] / w - gtboxes1[:, 1] = gtboxes1[:, 1] * img1.shape[0] / h - gtboxes1[:, 2] = gtboxes1[:, 2] * img1.shape[1] / w - gtboxes1[:, 3] = gtboxes1[:, 3] * img1.shape[0] / h - - gt_valid_mask2 = np.logical_and(gtboxes2[:, 2] > 0, gtboxes2[:, 3] > 0) - gtboxes2 = gtboxes2[gt_valid_mask2] - gtlabels2 = gtlabels2[gt_valid_mask2] - gtscores2 = gtscores2[gt_valid_mask2] - gtboxes2[:, 0] = gtboxes2[:, 0] * img2.shape[1] / w - gtboxes2[:, 1] = gtboxes2[:, 1] * img2.shape[0] / h - gtboxes2[:, 2] = gtboxes2[:, 2] * img2.shape[1] / w - gtboxes2[:, 3] = gtboxes2[:, 3] * img2.shape[0] / h - - gtboxes_all = np.concatenate((gtboxes1, gtboxes2), axis=0) - gtlabels_all = np.concatenate((gtlabels1, gtlabels2), axis=0) - gtscores_all = np.concatenate((gtscores1, gtscores2), axis=0) - gt_num = min(len(gtboxes), len(gtboxes_all)) - gtboxes[:gt_num] = gtboxes_all[:gt_num] - gtlabels[:gt_num] = gtlabels_all[:gt_num] - gtscores[:gt_num] = gtscores_all[:gt_num] - return img.astype('uint8'), gtboxes, gtlabels, gtscores - - -def image_augment(img, gtboxes, gtlabels, gtscores, size, means=None): - img = random_distort(img) - img, gtboxes = random_expand(img, gtboxes, fill=means) - img, gtboxes, gtlabels, gtscores = \ - random_crop(img, gtboxes, gtlabels, gtscores) - img = random_interp(img, size) - img, gtboxes = random_flip(img, gtboxes) - gtboxes, gtlabels, gtscores = shuffle_gtbox(gtboxes, gtlabels, gtscores) - - return img.astype('float32'), gtboxes.astype('float32'), \ - gtlabels.astype('int32'), gtscores.astype('float32') diff --git a/PaddleCV/yolov3/infer.py b/PaddleCV/yolov3/infer.py deleted file mode 100644 index e98e75020a931aa10e309dd306fe0558492d3f57..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/infer.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import time -import numpy as np -import paddle -import paddle.fluid as fluid -import box_utils -import reader -from utility import print_arguments, parse_args, check_gpu -from models.yolov3 import YOLOv3 -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval, Params -from config import cfg - - -def infer(): - - # check if set use_gpu=True in paddlepaddle cpu version - check_gpu(cfg.use_gpu) - - if not os.path.exists('output'): - os.mkdir('output') - - model = YOLOv3(is_train=False) - model.build_model() - outputs = model.get_pred() - input_size = cfg.input_size - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - # yapf: disable - if cfg.weights: - def if_exist(var): - return os.path.exists(os.path.join(cfg.weights, var.name)) - fluid.io.load_vars(exe, cfg.weights, predicate=if_exist) - # yapf: enable - - # you can save inference model by following code - # fluid.io.save_inference_model("./output/yolov3", - # feeded_var_names=['image', 'im_shape'], - # target_vars=outputs, - # executor=exe) - - feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) - fetch_list = [outputs] - image_names = [] - if cfg.image_name is not None: - image_names.append(cfg.image_name) - else: - for image_name in os.listdir(cfg.image_path): - if image_name.split('.')[-1] in ['jpg', 'png']: - image_names.append(image_name) - for image_name in image_names: - infer_reader = reader.infer(input_size, - os.path.join(cfg.image_path, image_name)) - label_names, _ = reader.get_label_infos() - data = next(infer_reader()) - im_shape = data[0][2] - outputs = exe.run(fetch_list=[v.name for v in fetch_list], - feed=feeder.feed(data), - return_numpy=False, - use_program_cache=True) - bboxes = np.array(outputs[0]) - if bboxes.shape[1] != 6: - print("No object found in {}".format(image_name)) - continue - labels = bboxes[:, 0].astype('int32') - scores = bboxes[:, 1].astype('float32') - boxes = bboxes[:, 2:].astype('float32') - - path = os.path.join(cfg.image_path, image_name) - box_utils.draw_boxes_on_image(path, boxes, scores, labels, label_names, - cfg.draw_thresh) - - -if __name__ == '__main__': - args = parse_args() - print_arguments(args) - infer() diff --git a/PaddleCV/yolov3/learning_rate.py b/PaddleCV/yolov3/learning_rate.py deleted file mode 100644 index d712832d31463cc054b99aa924bf9ca84f976634..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/learning_rate.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid -import paddle.fluid.layers.learning_rate_scheduler as lr_scheduler -from paddle.fluid.layers import control_flow - - -def exponential_with_warmup_decay(learning_rate, boundaries, values, - warmup_iter, warmup_factor): - global_step = lr_scheduler._decay_step_counter() - - lr = fluid.layers.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=True, - name="learning_rate") - - warmup_iter_var = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=float(warmup_iter), force_cpu=True) - - with control_flow.Switch() as switch: - with switch.case(global_step < warmup_iter_var): - alpha = global_step / warmup_iter_var - factor = warmup_factor * (1 - alpha) + alpha - decayed_lr = learning_rate * factor - fluid.layers.assign(decayed_lr, lr) - - for i in range(len(boundaries)): - boundary_val = fluid.layers.fill_constant( - shape=[1], - dtype='float32', - value=float(boundaries[i]), - force_cpu=True) - value_var = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=float(values[i])) - with switch.case(global_step < boundary_val): - fluid.layers.assign(value_var, lr) - - last_value_var = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=float(values[len(values) - 1])) - with switch.default(): - fluid.layers.assign(last_value_var, lr) - - return lr diff --git a/PaddleCV/yolov3/models/__init__.py b/PaddleCV/yolov3/models/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/PaddleCV/yolov3/models/darknet.py b/PaddleCV/yolov3/models/darknet.py deleted file mode 100644 index 9b9b7dd6c47da0506bf4dcc1ad60a3b563ab9125..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/models/darknet.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Constant -from paddle.fluid.regularizer import L2Decay - - -def conv_bn_layer(input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - is_test=True, - name=None): - conv1 = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr( - initializer=fluid.initializer.Normal(0., 0.02), - name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - out = fluid.layers.batch_norm( - input=conv1, - act=None, - is_test=is_test, - param_attr=ParamAttr( - initializer=fluid.initializer.Normal(0., 0.02), - regularizer=L2Decay(0.), - name=bn_name + '.scale'), - bias_attr=ParamAttr( - initializer=fluid.initializer.Constant(0.0), - regularizer=L2Decay(0.), - name=bn_name + '.offset'), - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - -def downsample(input, - ch_out, - filter_size=3, - stride=2, - padding=1, - is_test=True, - name=None): - return conv_bn_layer( - input, - ch_out=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - is_test=is_test, - name=name) - - -def basicblock(input, ch_out, is_test=True, name=None): - conv1 = conv_bn_layer( - input, ch_out, 1, 1, 0, is_test=is_test, name=name + ".0") - conv2 = conv_bn_layer( - conv1, ch_out * 2, 3, 1, 1, is_test=is_test, name=name + ".1") - out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) - return out - - -def layer_warp(block_func, input, ch_out, count, is_test=True, name=None): - res_out = block_func( - input, ch_out, is_test=is_test, name='{}.0'.format(name)) - for j in range(1, count): - res_out = block_func( - res_out, ch_out, is_test=is_test, name='{}.{}'.format(name, j)) - return res_out - - -DarkNet_cfg = {53: ([1, 2, 8, 8, 4], basicblock)} - - -def add_DarkNet53_conv_body(body_input, is_test=True): - stages, block_func = DarkNet_cfg[53] - stages = stages[0:5] - conv1 = conv_bn_layer( - body_input, - ch_out=32, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name="yolo_input") - downsample_ = downsample( - conv1, - ch_out=conv1.shape[1] * 2, - is_test=is_test, - name="yolo_input.downsample") - blocks = [] - for i, stage in enumerate(stages): - block = layer_warp( - block_func, - downsample_, - 32 * (2**i), - stage, - is_test=is_test, - name="stage.{}".format(i)) - blocks.append(block) - if i < len(stages) - 1: # do not downsaple in the last stage - downsample_ = downsample( - block, - ch_out=block.shape[1] * 2, - is_test=is_test, - name="stage.{}.downsample".format(i)) - return blocks[-1:-4:-1] diff --git a/PaddleCV/yolov3/models/yolov3.py b/PaddleCV/yolov3/models/yolov3.py deleted file mode 100644 index 0feb2b0e2dfa21b56838cc9f1300a63402054bd0..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/models/yolov3.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Constant -from paddle.fluid.initializer import Normal -from paddle.fluid.regularizer import L2Decay - -from config import cfg - -from .darknet import add_DarkNet53_conv_body -from .darknet import conv_bn_layer - - -def yolo_detection_block(input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2".format(channel) - conv = input - for j in range(2): - conv = conv_bn_layer( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.{}.0'.format(name, j)) - conv = conv_bn_layer( - conv, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.{}.1'.format(name, j)) - route = conv_bn_layer( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.2'.format(name)) - tip = conv_bn_layer( - route, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.tip'.format(name)) - return route, tip - - -def upsample(input, scale=2, name=None): - out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) - return out - - -class YOLOv3(object): - def __init__(self, is_train=True, use_random=True): - self.is_train = is_train - self.use_random = use_random - self.outputs = [] - self.losses = [] - self.downsample = 32 - - def build_input(self): - self.image_shape = [3, cfg.input_size, cfg.input_size] - if self.is_train: - self.py_reader = fluid.layers.py_reader( - capacity=64, - shapes=[[-1] + self.image_shape, [-1, cfg.max_box_num, 4], - [-1, cfg.max_box_num], [-1, cfg.max_box_num]], - lod_levels=[0, 0, 0, 0], - dtypes=['float32'] * 2 + ['int32'] + ['float32'], - use_double_buffer=True) - self.image, self.gtbox, self.gtlabel, self.gtscore = \ - fluid.layers.read_file(self.py_reader) - else: - self.image = fluid.layers.data( - name='image', shape=self.image_shape, dtype='float32') - self.im_shape = fluid.layers.data( - name="im_shape", shape=[2], dtype='int32') - self.im_id = fluid.layers.data( - name="im_id", shape=[1], dtype='int32') - - def feeds(self): - if not self.is_train: - return [self.image, self.im_id, self.im_shape] - return [self.image, self.gtbox, self.gtlabel, self.gtscore] - - def build_model(self): - self.build_input() - - self.outputs = [] - self.boxes = [] - self.scores = [] - - blocks = add_DarkNet53_conv_body(self.image, not self.is_train) - for i, block in enumerate(blocks): - if i > 0: - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = yolo_detection_block( - block, - channel=512 // (2**i), - is_test=(not self.is_train), - name="yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(cfg.anchor_masks[i]) * (cfg.class_num + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr( - initializer=fluid.initializer.Normal(0., 0.02), - name="yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - initializer=fluid.initializer.Constant(0.0), - regularizer=L2Decay(0.), - name="yolo_output.{}.conv.bias".format(i))) - self.outputs.append(block_out) - - if i < len(blocks) - 1: - route = conv_bn_layer( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not self.is_train), - name="yolo_transition.{}".format(i)) - # upsample - route = upsample(route) - - for i, out in enumerate(self.outputs): - anchor_mask = cfg.anchor_masks[i] - - if self.is_train: - loss = fluid.layers.yolov3_loss( - x=out, - gt_box=self.gtbox, - gt_label=self.gtlabel, - gt_score=self.gtscore, - anchors=cfg.anchors, - anchor_mask=anchor_mask, - class_num=cfg.class_num, - ignore_thresh=cfg.ignore_thresh, - downsample_ratio=self.downsample, - use_label_smooth=bool(cfg.label_smooth), - name="yolo_loss" + str(i)) - self.losses.append(fluid.layers.reduce_mean(loss)) - else: - mask_anchors = [] - for m in anchor_mask: - mask_anchors.append(cfg.anchors[2 * m]) - mask_anchors.append(cfg.anchors[2 * m + 1]) - boxes, scores = fluid.layers.yolo_box( - x=out, - img_size=self.im_shape, - anchors=mask_anchors, - class_num=cfg.class_num, - conf_thresh=cfg.valid_thresh, - downsample_ratio=self.downsample, - name="yolo_box" + str(i)) - self.boxes.append(boxes) - self.scores.append( - fluid.layers.transpose( - scores, perm=[0, 2, 1])) - - self.downsample //= 2 - - def loss(self): - return sum(self.losses) - - def get_pred(self): - yolo_boxes = fluid.layers.concat(self.boxes, axis=1) - yolo_scores = fluid.layers.concat(self.scores, axis=2) - return fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=cfg.valid_thresh, - nms_top_k=cfg.nms_topk, - keep_top_k=cfg.nms_posk, - nms_threshold=cfg.nms_thresh, - background_label=-1, - name="multiclass_nms") diff --git a/PaddleCV/yolov3/reader.py b/PaddleCV/yolov3/reader.py deleted file mode 100644 index 92a7ac1a59b457076e0c165fb25ca2f30195e092..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/reader.py +++ /dev/null @@ -1,356 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import numpy as np -import os -import sys -import random -import time -import copy -import cv2 -import box_utils -import image_utils -from pycocotools.coco import COCO -from data_utils import GeneratorEnqueuer -from config import cfg -import paddle.fluid as fluid - -class DataSetReader(object): - """A class for parsing and read COCO dataset""" - - def __init__(self): - self.has_parsed_categpry = False - - def _parse_dataset_dir(self, mode): - if 'coco2014' in cfg.dataset: - cfg.train_file_list = 'annotations/instances_train2014.json' - cfg.train_data_dir = 'train2014' - cfg.val_file_list = 'annotations/instances_val2014.json' - cfg.val_data_dir = 'val2014' - elif 'coco2017' in cfg.dataset: - cfg.train_file_list = 'annotations/instances_train2017.json' - cfg.train_data_dir = 'train2017' - cfg.val_file_list = 'annotations/instances_val2017.json' - cfg.val_data_dir = 'val2017' - else: - raise NotImplementedError('Dataset {} not supported'.format( - cfg.dataset)) - - if mode == 'train': - cfg.train_file_list = os.path.join(cfg.data_dir, - cfg.train_file_list) - cfg.train_data_dir = os.path.join(cfg.data_dir, cfg.train_data_dir) - self.COCO = COCO(cfg.train_file_list) - self.img_dir = cfg.train_data_dir - elif mode == 'test' or mode == 'infer': - cfg.val_file_list = os.path.join(cfg.data_dir, cfg.val_file_list) - cfg.val_data_dir = os.path.join(cfg.data_dir, cfg.val_data_dir) - self.COCO = COCO(cfg.val_file_list) - self.img_dir = cfg.val_data_dir - - def _parse_dataset_catagory(self): - self.categories = self.COCO.loadCats(self.COCO.getCatIds()) - self.num_category = len(self.categories) - self.label_names = [] - self.label_ids = [] - for category in self.categories: - self.label_names.append(category['name']) - self.label_ids.append(int(category['id'])) - self.category_to_id_map = {v: i for i, v in enumerate(self.label_ids)} - print("Load in {} categories.".format(self.num_category)) - if self.num_category != cfg.class_num: - raise ValueError("category number({}) in your dataset is not equal " - "to --class_num={} settting, which may incur errors in " - "eval/infer or cause precision loss.".format( - self.num_category, cfg.class_num)) - self.has_parsed_categpry = True - - def get_label_infos(self): - if not self.has_parsed_categpry: - self._parse_dataset_dir("test") - self._parse_dataset_catagory() - return (self.label_names, self.label_ids) - - def _parse_gt_annotations(self, img): - img_height = img['height'] - img_width = img['width'] - anno = self.COCO.loadAnns( - self.COCO.getAnnIds( - imgIds=img['id'], iscrowd=None)) - gt_index = 0 - for target in anno: - if target['area'] < cfg.gt_min_area: - continue - if 'ignore' in target and target['ignore']: - continue - - box = box_utils.coco_anno_box_to_center_relative( - target['bbox'], img_height, img_width) - if box[2] <= 0 and box[3] <= 0: - continue - - img['gt_boxes'][gt_index] = box - img['gt_labels'][gt_index] = \ - self.category_to_id_map[target['category_id']] - gt_index += 1 - if gt_index >= cfg.max_box_num: - break - - def _parse_images(self, is_train): - image_ids = self.COCO.getImgIds() - image_ids.sort() - imgs = copy.deepcopy(self.COCO.loadImgs(image_ids)) - for img in imgs: - img['image'] = os.path.join(self.img_dir, img['file_name']) - assert os.path.exists(img['image']), \ - "image {} not found.".format(img['image']) - box_num = cfg.max_box_num - img['gt_boxes'] = np.zeros((cfg.max_box_num, 4), dtype=np.float32) - img['gt_labels'] = np.zeros((cfg.max_box_num), dtype=np.int32) - for k in ['date_captured', 'url', 'license', 'file_name']: - if k in img: - del img[k] - - if is_train: - self._parse_gt_annotations(img) - - print("Loaded {0} images from {1}.".format(len(imgs), cfg.dataset)) - - return imgs - - def _parse_images_by_mode(self, mode): - if mode == 'infer': - return [] - else: - return self._parse_images(is_train=(mode == 'train')) - - def get_reader(self, - mode, - size=416, - batch_size=None, - shuffle=False, - shuffle_seed=None, - mixup_iter=0, - random_sizes=[], - image=None): - assert mode in ['train', 'test', 'infer'], "Unknow mode type!" - if mode != 'infer': - assert batch_size is not None, \ - "batch size connot be None in mode {}".format(mode) - self._parse_dataset_dir(mode) - self._parse_dataset_catagory() - - def img_reader(img, size, mean, std): - im_path = img['image'] - im = cv2.imread(im_path).astype('float32') - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - - h, w, _ = im.shape - im_scale_x = size / float(w) - im_scale_y = size / float(h) - out_img = cv2.resize( - im, - None, - None, - fx=im_scale_x, - fy=im_scale_y, - interpolation=cv2.INTER_CUBIC) - mean = np.array(mean).reshape((1, 1, -1)) - std = np.array(std).reshape((1, 1, -1)) - out_img = (out_img / 255.0 - mean) / std - out_img = out_img.transpose((2, 0, 1)) - - return (out_img, int(img['id']), (h, w)) - - def img_reader_with_augment(img, size, mean, std, mixup_img): - im_path = img['image'] - im = cv2.imread(im_path) - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - gt_boxes = img['gt_boxes'].copy() - gt_labels = img['gt_labels'].copy() - gt_scores = np.ones_like(gt_labels) - - if mixup_img: - mixup_im = cv2.imread(mixup_img['image']) - mixup_im = cv2.cvtColor(mixup_im, cv2.COLOR_BGR2RGB) - mixup_gt_boxes = np.array(mixup_img['gt_boxes']).copy() - mixup_gt_labels = np.array(mixup_img['gt_labels']).copy() - mixup_gt_scores = np.ones_like(mixup_gt_labels) - im, gt_boxes, gt_labels, gt_scores = \ - image_utils.image_mixup(im, gt_boxes, gt_labels, - gt_scores, mixup_im, mixup_gt_boxes, - mixup_gt_labels, mixup_gt_scores) - - im, gt_boxes, gt_labels, gt_scores = \ - image_utils.image_augment(im, gt_boxes, gt_labels, - gt_scores, size, mean) - - mean = np.array(mean).reshape((1, 1, -1)) - std = np.array(std).reshape((1, 1, -1)) - out_img = (im / 255.0 - mean) / std - out_img = out_img.astype('float32').transpose((2, 0, 1)) - - return (out_img, gt_boxes, gt_labels, gt_scores) - - def get_img_size(size, random_sizes=[]): - if len(random_sizes): - return np.random.choice(random_sizes) - return size - - def get_mixup_img(imgs, mixup_iter, total_iter, read_cnt): - if total_iter >= mixup_iter: - return None - - mixup_idx = np.random.randint(1, len(imgs)) - mixup_img = imgs[(read_cnt + mixup_idx) % len(imgs)] - return mixup_img - - def reader(): - if mode == 'train': - imgs = self._parse_images_by_mode(mode) - if shuffle: - if shuffle_seed is not None: - np.random.seed(shuffle_seed) - np.random.shuffle(imgs) - read_cnt = 0 - total_iter = 0 - batch_out = [] - img_size = get_img_size(size, random_sizes) - while True: - img = imgs[read_cnt % len(imgs)] - mixup_img = get_mixup_img(imgs, mixup_iter, total_iter, - read_cnt) - read_cnt += 1 - if read_cnt % len(imgs) == 0 and shuffle: - np.random.shuffle(imgs) - im, gt_boxes, gt_labels, gt_scores = \ - img_reader_with_augment(img, img_size, cfg.pixel_means, - cfg.pixel_stds, mixup_img) - batch_out.append([im, gt_boxes, gt_labels, gt_scores]) - - if len(batch_out) == batch_size: - yield batch_out - batch_out = [] - total_iter += 1 - img_size = get_img_size(size, random_sizes) - - elif mode == 'test': - imgs = self._parse_images_by_mode(mode) - batch_out = [] - for img in imgs: - im, im_id, im_shape = img_reader(img, size, cfg.pixel_means, - cfg.pixel_stds) - batch_out.append((im, im_id, im_shape)) - if len(batch_out) == batch_size: - yield batch_out - batch_out = [] - if len(batch_out) != 0: - yield batch_out - else: - img = {} - img['image'] = image - img['id'] = 0 - im, im_id, im_shape = img_reader(img, size, cfg.pixel_means, - cfg.pixel_stds) - batch_out = [(im, im_id, im_shape)] - yield batch_out - - # NOTE: yolov3 is a special model, if num_trainers > 1, each process - # trian the completed dataset. - # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - # if mode == 'train' and num_trainers > 1: - # assert shuffle_seed is not None, \ - # "If num_trainers > 1, the shuffle_seed must be set, because " \ - # "the order of batch data generated by reader " \ - # "must be the same in the respective processes." - # reader = fluid.contrib.reader.distributed_batch_reader(reader) - - return reader - - -dsr = DataSetReader() - - -def train(size=416, - batch_size=64, - shuffle=True, - shuffle_seed=None, - total_iter=0, - mixup_iter=0, - random_sizes=[], - num_workers=8, - max_queue=32, - use_multiprocess_reader=True): - generator = dsr.get_reader('train', size, batch_size, shuffle, shuffle_seed, - int(mixup_iter / num_workers), random_sizes) - - if not use_multiprocess_reader: - return generator - else: - if sys.platform == "win32": - print("multiprocess is not fully compatible with Windows, " - "you can set --use_multiprocess_reader=False if you " - "are training on Windows and there are errors incured " - "by multiprocess.") - print("multiprocess reader starting up, it takes a while...") - - def infinite_reader(): - while True: - for data in generator(): - yield data - - def reader(): - cnt = 0 - try: - enqueuer = GeneratorEnqueuer( - infinite_reader(), use_multiprocessing=use_multiprocess_reader) - enqueuer.start(max_queue_size=max_queue, workers=num_workers) - generator_out = None - while True: - while enqueuer.is_running(): - if not enqueuer.queue.empty(): - generator_out = enqueuer.queue.get() - break - else: - time.sleep(0.02) - yield generator_out - cnt += 1 - if cnt >= total_iter: - enqueuer.stop() - return - generator_out = None - except Exception as e: - print("Exception occured in reader: {}".format(str(e))) - finally: - if enqueuer: - enqueuer.stop() - - return reader - - -def test(size=416, batch_size=1): - return dsr.get_reader('test', size, batch_size) - - -def infer(size=416, image=None): - return dsr.get_reader('infer', size, image=image) - - -def get_label_infos(): - return dsr.get_label_infos() diff --git a/PaddleCV/yolov3/train.py b/PaddleCV/yolov3/train.py deleted file mode 100644 index 6dab2c80f5021f646b63ec55b242ab255670608e..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/train.py +++ /dev/null @@ -1,226 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os - - -def set_paddle_flags(flags): - for key, value in flags.items(): - if os.environ.get(key, None) is None: - os.environ[key] = str(value) - - -set_paddle_flags({ - 'FLAGS_eager_delete_tensor_gb': 0, # enable gc - 'FLAGS_memory_fraction_of_eager_deletion': 1, - 'FLAGS_fraction_of_gpu_memory_to_use': 0.98 -}) - -import sys -import numpy as np -import random -import time -import shutil -import subprocess -from utility import (parse_args, print_arguments, - SmoothedValue, check_gpu) - -import paddle -import paddle.fluid as fluid -from paddle.fluid import profiler -import reader -from models.yolov3 import YOLOv3 -from learning_rate import exponential_with_warmup_decay -from config import cfg -import dist_utils - -num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - -def get_device_num(): - # NOTE(zcd): for multi-processe training, each process use one GPU card. - if num_trainers > 1: - return 1 - return fluid.core.get_cuda_device_count() - - -def train(): - - # check if set use_gpu=True in paddlepaddle cpu version - check_gpu(cfg.use_gpu) - - if cfg.debug or args.enable_ce: - fluid.default_startup_program().random_seed = 1000 - fluid.default_main_program().random_seed = 1000 - random.seed(0) - np.random.seed(0) - - if not os.path.exists(cfg.model_save_dir): - os.makedirs(cfg.model_save_dir) - - model = YOLOv3() - model.build_model() - input_size = cfg.input_size - loss = model.loss() - loss.persistable = True - - devices_num = get_device_num() if cfg.use_gpu else 1 - print("Found {} CUDA/CPU devices.".format(devices_num)) - - learning_rate = cfg.learning_rate - boundaries = cfg.lr_steps - gamma = cfg.lr_gamma - step_num = len(cfg.lr_steps) - values = [learning_rate * (gamma**i) for i in range(step_num + 1)] - - optimizer = fluid.optimizer.Momentum( - learning_rate=exponential_with_warmup_decay( - learning_rate=learning_rate, - boundaries=boundaries, - values=values, - warmup_iter=cfg.warm_up_iter, - warmup_factor=cfg.warm_up_factor), - regularization=fluid.regularizer.L2Decay(cfg.weight_decay), - momentum=cfg.momentum) - optimizer.minimize(loss) - - gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) - place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - if cfg.pretrain: - if not os.path.exists(cfg.pretrain): - print("Pretrain weights not found: {}".format(cfg.pretrain)) - - def if_exist(var): - return os.path.exists(os.path.join(cfg.pretrain, var.name)) - - fluid.io.load_vars(exe, cfg.pretrain, predicate=if_exist) - - build_strategy = fluid.BuildStrategy() - build_strategy.memory_optimize = False #gc and memory optimize may conflict - syncbn = cfg.syncbn - if (syncbn and devices_num <= 1) or num_trainers > 1: - print("Disable syncbn in single device") - syncbn = False - build_strategy.sync_batch_norm = syncbn - - exec_strategy = fluid.ExecutionStrategy() - if cfg.use_gpu and num_trainers > 1: - dist_utils.prepare_for_multi_process(exe, build_strategy, - fluid.default_main_program()) - exec_strategy.num_threads = 1 - - compile_program = fluid.compiler.CompiledProgram(fluid.default_main_program( - )).with_data_parallel( - loss_name=loss.name, - build_strategy=build_strategy, - exec_strategy=exec_strategy) - - random_sizes = [cfg.input_size] - if cfg.random_shape: - random_sizes = [32 * i for i in range(10, 20)] - - total_iter = cfg.max_iter - cfg.start_iter - mixup_iter = total_iter - cfg.no_mixup_iter - - shuffle = True - if args.enable_ce: - shuffle = False - shuffle_seed = None - # NOTE: yolov3 is a special model, if num_trainers > 1, each process - # trian the completed dataset. - # if num_trainers > 1: shuffle_seed = 1 - train_reader = reader.train( - input_size, - batch_size=cfg.batch_size, - shuffle=shuffle, - shuffle_seed=shuffle_seed, - total_iter=total_iter * devices_num, - mixup_iter=mixup_iter * devices_num, - random_sizes=random_sizes, - use_multiprocess_reader=cfg.use_multiprocess_reader, - num_workers=cfg.worker_num) - py_reader = model.py_reader - py_reader.decorate_paddle_reader(train_reader) - - def save_model(postfix): - model_path = os.path.join(cfg.model_save_dir, postfix) - if os.path.isdir(model_path): - shutil.rmtree(model_path) - fluid.io.save_persistables(exe, model_path) - - fetch_list = [loss] - - py_reader.start() - smoothed_loss = SmoothedValue() - try: - start_time = time.time() - prev_start_time = start_time - snapshot_loss = 0 - snapshot_time = 0 - for iter_id in range(cfg.start_iter, cfg.max_iter): - prev_start_time = start_time - start_time = time.time() - losses = exe.run(compile_program, - fetch_list=[v.name for v in fetch_list]) - smoothed_loss.add_value(np.mean(np.array(losses[0]))) - snapshot_loss += np.mean(np.array(losses[0])) - snapshot_time += start_time - prev_start_time - lr = np.array(fluid.global_scope().find_var('learning_rate') - .get_tensor()) - print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format( - iter_id, lr[0], - smoothed_loss.get_mean_value(), start_time - prev_start_time)) - sys.stdout.flush() - #add profiler tools - if args.is_profiler and iter_id == 5: - profiler.start_profiler("All") - elif args.is_profiler and iter_id == 10: - profiler.stop_profiler("total", args.profiler_path) - return - - if (iter_id + 1) % cfg.snapshot_iter == 0: - save_model("model_iter{}".format(iter_id)) - print("Snapshot {} saved, average loss: {}, \ - average time: {}".format( - iter_id + 1, snapshot_loss / float(cfg.snapshot_iter), - snapshot_time / float(cfg.snapshot_iter))) - if args.enable_ce and iter_id == cfg.max_iter - 1: - if devices_num == 1: - print("kpis\ttrain_cost_1card\t%f" % - (snapshot_loss / float(cfg.snapshot_iter))) - print("kpis\ttrain_duration_1card\t%f" % - (snapshot_time / float(cfg.snapshot_iter))) - else: - print("kpis\ttrain_cost_8card\t%f" % - (snapshot_loss / float(cfg.snapshot_iter))) - print("kpis\ttrain_duration_8card\t%f" % - (snapshot_time / float(cfg.snapshot_iter))) - - snapshot_loss = 0 - snapshot_time = 0 - except fluid.core.EOFException: - py_reader.reset() - - save_model('model_final') - - -if __name__ == '__main__': - args = parse_args() - print_arguments(args) - train() diff --git a/PaddleCV/yolov3/utility.py b/PaddleCV/yolov3/utility.py deleted file mode 100644 index 9d442f4ee79b29691b84ec95b78ccfe76c4c55a0..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/utility.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -""" -Contains common utility functions. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import sys -import distutils.util -import numpy as np -import six -from collections import deque -import paddle.fluid as fluid -import argparse -import functools -from config import * - - -def print_arguments(args): - """Print argparse's arguments. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - parser.add_argument("name", default="Jonh", type=str, help="User name.") - args = parser.parse_args() - print_arguments(args) - - :param args: Input argparse.Namespace for printing. - :type args: argparse.Namespace - """ - print("----------- Configuration Arguments -----------") - for arg, value in sorted(six.iteritems(vars(args))): - print("%s: %s" % (arg, value)) - print("------------------------------------------------") - - -def add_arguments(argname, type, default, help, argparser, **kwargs): - """Add argparse's argument. - - Usage: - - .. code-block:: python - - parser = argparse.ArgumentParser() - add_argument("name", str, "Jonh", "User name.", parser) - args = parser.parse_args() - """ - type = distutils.util.strtobool if type == bool else type - argparser.add_argument( - "--" + argname, - default=default, - type=type, - help=help + ' Default: %(default)s.', - **kwargs) - - -class SmoothedValue(object): - """Track a series of values and provide access to smoothed values over a - window or the global series average. - """ - - def __init__(self): - self.loss_sum = 0.0 - self.iter_cnt = 0 - - def add_value(self, value): - self.loss_sum += np.mean(value) - self.iter_cnt += 1 - - def get_mean_value(self): - return self.loss_sum / self.iter_cnt - - -def check_gpu(use_gpu): - """ - Log error and exit when set use_gpu=True in paddlepaddle - cpu version. - """ - err = "Config use_gpu cannot be set as True while you are " \ - "using paddlepaddle cpu version ! \nPlease try: \n" \ - "\t1. Install paddlepaddle-gpu to run model on GPU \n" \ - "\t2. Set --use_gpu=False to run model on CPU" - - try: - if use_gpu and not fluid.is_compiled_with_cuda(): - print(err) - sys.exit(1) - except Exception as e: - pass - - -def parse_args(): - """return all args - """ - parser = argparse.ArgumentParser(description=__doc__) - add_arg = functools.partial(add_arguments, argparser=parser) - # yapf: disable - # ENV - add_arg('use_gpu', bool, True, "Whether use GPU.") - add_arg('model_save_dir', str, 'checkpoints', "The path to save model.") - add_arg('pretrain', str, 'weights/darknet53', "The pretrain model path.") - add_arg('weights', str, 'weights/yolov3', "The weights path.") - add_arg('dataset', str, 'coco2017', "Dataset: coco2014, coco2017.") - add_arg('class_num', int, 80, "Class number.") - add_arg('data_dir', str, 'dataset/coco', "The data root path.") - add_arg('start_iter', int, 0, "Start iteration.") - add_arg('use_multiprocess_reader', bool, True, "whether use multiprocess reader.") - add_arg('worker_num', int, 8, "worker number for multiprocess reader.") - #SOLVER - add_arg('batch_size', int, 8, "Mini-batch size per device.") - add_arg('learning_rate', float, 0.001, "Learning rate.") - add_arg('max_iter', int, 500200, "Iter number.") - add_arg('snapshot_iter', int, 2000, "Save model every snapshot stride.") - add_arg('label_smooth', bool, True, "Use label smooth in class label.") - add_arg('no_mixup_iter', int, 40000, "Disable mixup in last N iter.") - # TRAIN TEST INFER - add_arg('input_size', int, 608, "Image input size of YOLOv3.") - add_arg('syncbn', bool, True, "Whether to use synchronized batch normalization.") - add_arg('random_shape', bool, True, "Resize to random shape for train reader.") - add_arg('valid_thresh', float, 0.005, "Valid confidence score for NMS.") - add_arg('nms_thresh', float, 0.45, "NMS threshold.") - add_arg('nms_topk', int, 400, "The number of boxes to perform NMS.") - add_arg('nms_posk', int, 100, "The number of boxes of NMS output.") - add_arg('debug', bool, False, "Debug mode") - # SINGLE EVAL AND DRAW - add_arg('image_path', str, 'image', - "The image path used to inference and visualize.") - add_arg('image_name', str, None, - "The single image used to inference and visualize. None to inference all images in image_path") - add_arg('draw_thresh', float, 0.5, - "Confidence score threshold to draw prediction box in image in debug mode") - add_arg('enable_ce', bool, False, "If set True, enable continuous evaluation job.") - # args for profiler tools - add_arg('is_profiler', int, 0, "the switch of profiler") - add_arg('profiler_path', str, './', "the path to save profiler output files") - # yapf: enable - args = parser.parse_args() - file_name = sys.argv[0] - merge_cfg_from_args(args) - return args diff --git a/PaddleCV/yolov3/weights/download.sh b/PaddleCV/yolov3/weights/download.sh deleted file mode 100644 index 44295ab53ef428ba664bf0b73a61e469c643690a..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/weights/download.sh +++ /dev/null @@ -1,10 +0,0 @@ -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" - -# Download the pretrain weights. -echo "Downloading..." -wget https://paddlemodels.bj.bcebos.com/yolo/darknet53.tar.gz -wget https://paddlemodels.bj.bcebos.com/yolo/yolov3.tar.gz -echo "Extracting..." -tar -xf darknet53.tar.gz -tar -xf yolov3.tar.gz