From 3529e38f87911adcc3247513341d0137c18f0bb7 Mon Sep 17 00:00:00 2001 From: Feng Ni Date: Wed, 30 Nov 2022 13:43:55 +0800 Subject: [PATCH] [SSOD] Add DenseTeacher FCOS base codes (#7423) * add ssod denseteacher fcos codes and refine baseline doc * fix docs * fix deploy and ColorJitter float32 * fix docs * fix docs, test=document_fix --- configs/ssod/README.md | 301 ++++++++++- configs/ssod/_base_/coco_detection_full.yml | 31 ++ .../ssod/_base_/coco_detection_percent_1.yml | 31 ++ .../ssod/_base_/coco_detection_percent_10.yml | 31 ++ .../ssod/_base_/coco_detection_percent_5.yml | 31 ++ configs/ssod/baseline/README.md | 56 ++- .../faster_rcnn_r50_fpn_2x_coco_sup005.yml | 42 ++ .../faster_rcnn_r50_fpn_2x_coco_sup010.yml | 20 +- .../baseline/fcos_r50_fpn_2x_coco_sup005.yml | 2 +- .../baseline/fcos_r50_fpn_2x_coco_sup010.yml | 2 +- .../retinanet_r50_fpn_2x_coco_sup005.yml | 26 + .../retinanet_r50_fpn_2x_coco_sup010.yml | 4 +- configs/ssod/denseteacher/README.md | 78 +++ ...denseteacher_fcos_r50_fpn_coco_semi005.yml | 159 ++++++ ...denseteacher_fcos_r50_fpn_coco_semi010.yml | 164 ++++++ ...eteacher_fcos_r50_fpn_coco_semi010_lsj.yml | 44 ++ ppdet/data/reader.py | 309 ++++++++++++ ppdet/data/source/coco.py | 227 ++++++++- ppdet/data/transform/batch_operators.py | 28 +- ppdet/data/transform/operators.py | 188 ++++++- ppdet/engine/__init__.py | 4 + ppdet/engine/trainer_ssod.py | 475 ++++++++++++++++++ ppdet/modeling/__init__.py | 2 + ppdet/modeling/architectures/fcos.py | 99 +++- ppdet/modeling/heads/fcos_head.py | 22 +- ppdet/modeling/losses/fcos_loss.py | 9 +- ppdet/modeling/ssod_utils.py | 93 ++++ ppdet/optimizer/__init__.py | 5 +- ppdet/optimizer/ema.py | 48 ++ ppdet/optimizer/optimizer.py | 5 + ppdet/utils/checkpoint.py | 20 +- tools/train.py | 14 +- 32 files changed, 2507 insertions(+), 63 deletions(-) create mode 100644 configs/ssod/_base_/coco_detection_full.yml create mode 100644 configs/ssod/_base_/coco_detection_percent_1.yml create mode 100644 configs/ssod/_base_/coco_detection_percent_10.yml create mode 100644 configs/ssod/_base_/coco_detection_percent_5.yml create mode 100644 configs/ssod/baseline/faster_rcnn_r50_fpn_2x_coco_sup005.yml create mode 100644 configs/ssod/baseline/retinanet_r50_fpn_2x_coco_sup005.yml create mode 100644 configs/ssod/denseteacher/README.md create mode 100644 configs/ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi005.yml create mode 100644 configs/ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010.yml create mode 100644 configs/ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010_lsj.yml create mode 100644 ppdet/engine/trainer_ssod.py create mode 100644 ppdet/modeling/ssod_utils.py diff --git a/configs/ssod/README.md b/configs/ssod/README.md index f0462882c..ab1fc9faa 100644 --- a/configs/ssod/README.md +++ b/configs/ssod/README.md @@ -5,12 +5,25 @@ ## 内容 - [简介](#简介) - [模型库](#模型库) -- [数据集准备](#数据集准备) + - [Baseline](#Baseline) + - [DenseTeacher](#DenseTeacher) +- [半监督数据集准备](#半监督数据集准备) +- [半监督检测配置](#半监督检测配置) + - [训练集配置](#训练集配置) + - [预训练配置](#预训练配置) + - [全局配置](#全局配置) + - [模型配置](#模型配置) + - [数据增强配置](#数据增强配置) + - [其他配置](#其他配置) +- [使用说明](#使用说明) + - [训练](#训练) + - [评估](#评估) + - [预测](#预测) + - [部署](#部署) - [引用](#引用) ## 简介 -半监督目标检测(SSOD)是**同时使用有标注数据和无标注数据**进行训练的目标检测,既可以极大地节省标注成本,也可以充分利用无标注数据进一步提高检测精度。 - +半监督目标检测(SSOD)是**同时使用有标注数据和无标注数据**进行训练的目标检测,既可以极大地节省标注成本,也可以充分利用无标注数据进一步提高检测精度。PaddleDetection团队复现了[DenseTeacher](denseteacher)半监督检测算法,用户可以下载使用。 ## 模型库 @@ -19,8 +32,17 @@ **纯监督数据**模型的训练和模型库,请参照[Baseline](baseline); +### [DenseTeacher](denseteacher) + +| 模型 | 基础检测器 | 监督数据比例 | Sup mAPval
0.5:0.95 | Semi mAPval
0.5:0.95 | Semi Epochs (Iters) | 模型下载 | 配置文件 | +| :------------: | :---------------------: | :-----------: | :-------------------------: |:---------------------------: |:--------------------: | :-------: |:---------: | +| DenseTeacher | [FCOS ResNet50-FPN](./baseline/fcos_r50_fpn_2x_coco_sup005.yml) | 5% | 21.3 | 30.6 | 240 (87120) | [download](https://paddledet.bj.bcebos.com/models/denseteacher_fcos_r50_fpn_coco_semi005.pdparams) | [config](denseteacher/denseteacher_fcos_r50_fpn_coco_semi005.yml) | +| DenseTeacher | [FCOS ResNet50-FPN](./baseline/fcos_r50_fpn_2x_coco_sup010.yml) | 10%| 26.3 | 35.1 | 240 (174240)| [download](https://paddledet.bj.bcebos.com/models/denseteacher_fcos_r50_fpn_coco_semi010.pdparams) | [config](denseteacher/denseteacher_fcos_r50_fpn_coco_semi010.yml) | +| DenseTeacher(LSJ)| [FCOS ResNet50-FPN](./baseline/fcos_r50_fpn_2x_coco_sup010.yml) |10%| 26.3 | 37.1 | 240 (174240)| [download](https://paddledet.bj.bcebos.com/models/denseteacher_fcos_r50_fpn_coco_semi010_lsj.pdparams) | [config](denseteacher/denseteacher_fcos_r50_fpn_coco_semi010_lsj.yml) | +| DenseTeacher | [FCOS ResNet50-FPN](./../fcos/fcos_r50_fpn_iou_multiscale_2x_coco.ymll) |full| 42.6 | - | 36 (263844)| [download](https://paddledet.bj.bcebos.com/models/denseteacher_fcos_r50_fpn_coco_full.pdparams) | [config](denseteacher/denseteacher_fcos_r50_fpn_coco_full.yml) | + -## 数据集准备 +## 半监督数据集准备 半监督目标检测**同时需要有标注数据和无标注数据**,且无标注数据量一般**远多于有标注数据量**。 对于COCO数据集一般有两种常规设置: @@ -81,7 +103,7 @@ with open(path, 'w') as f: -
+
解压后的数据集目录如下: ``` @@ -107,3 +129,272 @@ PaddleDetection ```
+ +## 半监督检测配置 + +配置半监督检测,需要基于选用的**基础检测器**的配置文件,如: + +```python +_BASE_: [ + '../../fcos/fcos_r50_fpn_iou_multiscale_2x_coco.yml', + '../_base_/coco_detection_percent_10.yml', +] +log_iter: 50 +snapshot_epoch: 5 +epochs: &epochs 240 +weights: output/denseteacher_fcos_r50_fpn_coco_semi010/model_final +``` +并依次做出如下几点改动: + +### 训练集配置 + +首先可以直接引用已经配置好的半监督训练集,如: + +```python +_BASE_: [ + '../_base_/coco_detection_percent_10.yml', +] +``` + +具体来看,构建半监督数据集,需要同时配置监督数据集`TrainDataset`和无监督数据集`UnsupTrainDataset`的路径,**注意必须选用`SemiCOCODataSet`类而不是`COCODataSet`类**,如以下所示: + +**COCO-train2017部分比例数据集**: + +```python +# partial labeled COCO, use `SemiCOCODataSet` rather than `COCODataSet` +TrainDataset: + !SemiCOCODataSet + image_dir: train2017 + anno_path: semi_annotations/instances_train2017.1@1.json + dataset_dir: dataset/coco + data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] + +# partial unlabeled COCO, use `SemiCOCODataSet` rather than `COCODataSet` +UnsupTrainDataset: + !SemiCOCODataSet + image_dir: train2017 + anno_path: semi_annotations/instances_train2017.1@1-unlabeled.json + dataset_dir: dataset/coco + data_fields: ['image'] + supervised: False +``` + +或者 **COCO-train2017 full全量数据集**: + +```python +# full labeled COCO, use `SemiCOCODataSet` rather than `COCODataSet` +TrainDataset: + !SemiCOCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: dataset/coco + data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] + +# full unlabeled COCO, use `SemiCOCODataSet` rather than `COCODataSet` +UnsupTrainDataset: + !SemiCOCODataSet + image_dir: unlabeled2017 + anno_path: annotations/instances_unlabeled2017.json + dataset_dir: dataset/coco + data_fields: ['image'] + supervised: False +``` + +验证集`EvalDataset`和测试集`TestDataset`的配置**不需要更改**,且还是采用`COCODataSet`类。 + + +### 预训练配置 + +```python +### pretrain and warmup config, choose one and coment another +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams +semi_start_iters: 5000 +ema_start_iters: 3000 +use_warmup: &use_warmup True +``` + +### 全局配置 + +需要在配置文件中添加如下全局配置: + +```python +### global config +use_simple_ema: True +ema_decay: 0.9996 +ssod_method: DenseTeacher +DenseTeacher: + train_cfg: + sup_weight: 1.0 + unsup_weight: 1.0 + loss_weight: {distill_loss_cls: 4.0, distill_loss_box: 1.0, distill_loss_quality: 1.0} + concat_sup_data: True + suppress: linear + ratio: 0.01 + gamma: 2.0 + test_cfg: + inference_on: teacher +``` + +### 模型配置 + +如果没有特殊改动,则直接继承自基础检测器里的模型配置。 +以 `DenseTeacher` 为例,选择 `fcos_r50_fpn_iou_multiscale_2x_coco.yml` 作为**基础检测器**进行半监督训练,**teacher网络的结构和student网络的结构均为基础检测器的结构,且结构相同**。 + +```python +_BASE_: [ + '../../fcos/fcos_r50_fpn_iou_multiscale_2x_coco.yml', +] +``` + +### 数据增强配置 + +构建半监督训练集的Reader,需要在原先`TrainReader`的基础上,新增加`weak_aug`,`strong_aug`,`sup_batch_transforms`和`unsup_batch_transforms`,并且需要注意: +- **如果有`NormalizeImage`,需要单独从`sample_transforms`中抽出来放在`weak_aug`和`strong_aug`中; +- `sample_transforms`为**公用的基础数据增强**; +- 完整的弱数据增强为``sample_transforms + weak_aug`,完整的强数据增强为`sample_transforms + strong_aug`; + +如以下所示: + +原纯监督模型的`TrainReader`: +```python +TrainReader: + sample_transforms: + - Decode: {} + - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 1} + - RandomFlip: {} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + batch_transforms: + - Permute: {} + - PadBatch: {pad_to_stride: 32} + - Gt2FCOSTarget: + object_sizes_boundary: [64, 128, 256, 512] + center_sampling_radius: 1.5 + downsample_ratios: [8, 16, 32, 64, 128] + norm_reg_targets: True + batch_size: 2 + shuffle: True + drop_last: True +``` + +更改后的半监督TrainReader: + +```python +### reader config +SemiTrainReader: + sample_transforms: + - Decode: {} + - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 1} + - RandomFlip: {} + weak_aug: + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} + strong_aug: + - StrongAugImage: {transforms: [ + RandomColorJitter: {prob: 0.8, brightness: 0.4, contrast: 0.4, saturation: 0.4, hue: 0.1}, + RandomErasingCrop: {}, + RandomGaussianBlur: {prob: 0.5, sigma: [0.1, 2.0]}, + RandomGrayscale: {prob: 0.2}, + ]} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} + sup_batch_transforms: + - Permute: {} + - PadBatch: {pad_to_stride: 32} + - Gt2FCOSTarget: + object_sizes_boundary: [64, 128, 256, 512] + center_sampling_radius: 1.5 + downsample_ratios: [8, 16, 32, 64, 128] + num_shift: 0.5 + multiply_strides_reg_targets: False + norm_reg_targets: True + unsup_batch_transforms: + - Permute: {} + - PadBatch: {pad_to_stride: 32} + sup_batch_size: 2 + unsup_batch_size: 2 + shuffle: True + drop_last: True +``` + +### 其他配置 + +训练epoch数需要和全量数据训练时换算总iter数保持一致,如全量训练24 epoch(换算约为180k个iter),则10%监督数据的半监督训练,总epoch数需要为240 epoch左右(换算约为180k个iter)。示例如下: + +```python +### other config +epoch: 240 +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: 240 + use_warmup: True + - !LinearWarmup + start_factor: 0.001 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + clip_grad_by_value: 1.0 +``` + + +## 使用说明 + +仅训练时必须使用半监督检测的配置文件去训练,评估、预测、部署也可以按基础检测器的配置文件去执行。 + +### 训练 + +```bash +# 单卡训练 (不推荐,需按线性比例相应地调整学习率) +CUDA_VISIBLE_DEVICES=0 python tools/train.py -c ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010.yml --eval + +# 多卡训练 +python -m paddle.distributed.launch --log_dir=denseteacher_fcos_semi010/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010.yml --eval +``` + +### 评估 + +```bash +CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010.yml -o weights=output/denseteacher_fcos_r50_fpn_coco_semi010/model_final.pdparams +``` + +### 预测 + +```bash +CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010.yml -o weights=output/denseteacher_fcos_r50_fpn_coco_semi010/model_final.pdparams --infer_img=demo/000000014439.jpg +``` + +### 部署 + +部署可以使用半监督检测配置文件,也可以使用基础检测器的配置文件去部署和使用。 + +```bash +# 导出模型 +CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010.yml -o weights=https://paddledet.bj.bcebos.com/models/denseteacher_fcos_r50_fpn_coco_semi010.pdparams + +# 导出权重预测 +CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/denseteacher_fcos_r50_fpn_coco_semi010 --image_file=demo/000000014439_640x640.jpg --device=GPU + +# 部署测速 +CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/denseteacher_fcos_r50_fpn_coco_semi010 --image_file=demo/000000014439_640x640.jpg --device=GPU --run_benchmark=True # --run_mode=trt_fp16 + +# 导出ONNX +paddle2onnx --model_dir output_inference/denseteacher_fcos_r50_fpn_coco_semi010/ --model_filename model.pdmodel --params_filename model.pdiparams --opset_version 12 --save_file denseteacher_fcos_r50_fpn_coco_semi010.onnx +``` + + +## 引用 + +``` + @article{denseteacher2022, + title={Dense Teacher: Dense Pseudo-Labels for Semi-supervised Object Detection}, + author={Hongyu Zhou, Zheng Ge, Songtao Liu, Weixin Mao, Zeming Li, Haiyan Yu, Jian Sun}, + journal={arXiv preprint arXiv:2207.02541}, + year={2022} +} +``` diff --git a/configs/ssod/_base_/coco_detection_full.yml b/configs/ssod/_base_/coco_detection_full.yml new file mode 100644 index 000000000..2805f88c8 --- /dev/null +++ b/configs/ssod/_base_/coco_detection_full.yml @@ -0,0 +1,31 @@ +metric: COCO +num_classes: 80 + +# full labeled COCO, use `SemiCOCODataSet` rather than `COCODataSet` +TrainDataset: + !SemiCOCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: dataset/coco + data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] + +# full unlabeled COCO, use `SemiCOCODataSet` rather than `COCODataSet` +UnsupTrainDataset: + !SemiCOCODataSet + image_dir: unlabeled2017 + anno_path: annotations/instances_unlabeled2017.json + dataset_dir: dataset/coco + data_fields: ['image'] + supervised: False + +EvalDataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: dataset/coco + allow_empty: true + +TestDataset: + !ImageFolder + anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt) + dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path' diff --git a/configs/ssod/_base_/coco_detection_percent_1.yml b/configs/ssod/_base_/coco_detection_percent_1.yml new file mode 100644 index 000000000..569b8e9dc --- /dev/null +++ b/configs/ssod/_base_/coco_detection_percent_1.yml @@ -0,0 +1,31 @@ +metric: COCO +num_classes: 80 + +# partial labeled COCO, use `SemiCOCODataSet` rather than `COCODataSet` +TrainDataset: + !SemiCOCODataSet + image_dir: train2017 + anno_path: semi_annotations/instances_train2017.1@1.json + dataset_dir: dataset/coco + data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] + +# partial unlabeled COCO, use `SemiCOCODataSet` rather than `COCODataSet` +UnsupTrainDataset: + !SemiCOCODataSet + image_dir: train2017 + anno_path: semi_annotations/instances_train2017.1@1-unlabeled.json + dataset_dir: dataset/coco + data_fields: ['image'] + supervised: False + +EvalDataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: dataset/coco + allow_empty: true + +TestDataset: + !ImageFolder + anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt) + dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path' diff --git a/configs/ssod/_base_/coco_detection_percent_10.yml b/configs/ssod/_base_/coco_detection_percent_10.yml new file mode 100644 index 000000000..587460178 --- /dev/null +++ b/configs/ssod/_base_/coco_detection_percent_10.yml @@ -0,0 +1,31 @@ +metric: COCO +num_classes: 80 + +# partial labeled COCO, use `SemiCOCODataSet` rather than `COCODataSet` +TrainDataset: + !SemiCOCODataSet + image_dir: train2017 + anno_path: semi_annotations/instances_train2017.1@10.json + dataset_dir: dataset/coco + data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] + +# partial unlabeled COCO, use `SemiCOCODataSet` rather than `COCODataSet` +UnsupTrainDataset: + !SemiCOCODataSet + image_dir: train2017 + anno_path: semi_annotations/instances_train2017.1@10-unlabeled.json + dataset_dir: dataset/coco + data_fields: ['image'] + supervised: False + +EvalDataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: dataset/coco + allow_empty: true + +TestDataset: + !ImageFolder + anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt) + dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path' diff --git a/configs/ssod/_base_/coco_detection_percent_5.yml b/configs/ssod/_base_/coco_detection_percent_5.yml new file mode 100644 index 000000000..01d5fde1b --- /dev/null +++ b/configs/ssod/_base_/coco_detection_percent_5.yml @@ -0,0 +1,31 @@ +metric: COCO +num_classes: 80 + +# partial labeled COCO, use `SemiCOCODataSet` rather than `COCODataSet` +TrainDataset: + !SemiCOCODataSet + image_dir: train2017 + anno_path: semi_annotations/instances_train2017.1@5.json + dataset_dir: dataset/coco + data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] + +# partial unlabeled COCO, use `SemiCOCODataSet` rather than `COCODataSet` +UnsupTrainDataset: + !SemiCOCODataSet + image_dir: train2017 + anno_path: semi_annotations/instances_train2017.1@5-unlabeled.json + dataset_dir: dataset/coco + data_fields: ['image'] + supervised: False + +EvalDataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: dataset/coco + allow_empty: true + +TestDataset: + !ImageFolder + anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt) + dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path' diff --git a/configs/ssod/baseline/README.md b/configs/ssod/baseline/README.md index 1453b8b22..02a3fde87 100644 --- a/configs/ssod/baseline/README.md +++ b/configs/ssod/baseline/README.md @@ -4,43 +4,57 @@ ### [FCOS](../../fcos) -| 基础模型 | 监督数据比例 | mAPval
0.5:0.95 | 模型下载 | 配置文件 | -| :---------------: | :-------------: | :---------------------: |:--------: | :---------: | -| FCOS ResNet50-FPN | 5% | 21.3 | [download](https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_2x_coco_sup005.pdparams) | [config](fcos_r50_fpn_2x_coco_sup005.yml) | -| FCOS ResNet50-FPN | 10% | 26.3 | [download](https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_2x_coco_sup010.pdparams) | [config](fcos_r50_fpn_2x_coco_sup010.yml) | -| FCOS ResNet50-FPN | full | 42.6 | [download](https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_iou_multiscale_2x_coco.pdparams) | [config](../../fcos/fcos_r50_fpn_iou_multiscale_2x_coco.yml) | +| 基础模型 | 监督数据比例 | Epochs (Iters) | mAPval
0.5:0.95 | 模型下载 | 配置文件 | +| :---------------: | :-------------: | :---------------: |:---------------------: |:--------: | :---------: | +| FCOS ResNet50-FPN | 5% | 24 (8712) | 21.3 | [download](https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_2x_coco_sup005.pdparams) | [config](fcos_r50_fpn_2x_coco_sup005.yml) | +| FCOS ResNet50-FPN | 10% | 24 (17424) | 26.3 | [download](https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_2x_coco_sup010.pdparams) | [config](fcos_r50_fpn_2x_coco_sup010.yml) | +| FCOS ResNet50-FPN | full | 24 (175896) | 42.6 | [download](https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_iou_multiscale_2x_coco.pdparams) | [config](../../fcos/fcos_r50_fpn_iou_multiscale_2x_coco.yml) | + +**注意:** + - 以上模型训练默认使用8 GPUs,总batch_size默认为16,默认初始学习率为0.01。如果改动了总batch_size,请按线性比例相应地调整学习率。 ### [PP-YOLOE+](../../ppyoloe) -| 基础模型 | 监督数据比例 | mAPval
0.5:0.95 | 模型下载 | 配置文件 | -| :---------------: | :-------------: | :---------------------: |:--------: | :---------: | -| PP-YOLOE+_s | 5% | 32.8 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_s_80e_coco_sup005.pdparams) | [config](ppyoloe_plus_crn_s_80e_coco_sup005.yml) | -| PP-YOLOE+_s | 10% | 35.3 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_s_80e_coco_sup010.pdparams) | [config](ppyoloe_plus_crn_s_80e_coco_sup010.yml) | -| PP-YOLOE+_s | full | 43.7 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_s_80e_coco.pdparams) | [config](../../ppyoloe/ppyoloe_plus_crn_s_80e_coco.yml) | +| 基础模型 | 监督数据比例 | Epochs (Iters) | mAPval
0.5:0.95 | 模型下载 | 配置文件 | +| :---------------: | :-------------: | :---------------: | :---------------------: |:--------: | :---------: | +| PP-YOLOE+_s | 5% | 80 (7200) | 32.8 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_s_80e_coco_sup005.pdparams) | [config](ppyoloe_plus_crn_s_80e_coco_sup005.yml) | +| PP-YOLOE+_s | 10% | 80 (14480) | 35.3 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_s_80e_coco_sup010.pdparams) | [config](ppyoloe_plus_crn_s_80e_coco_sup010.yml) | +| PP-YOLOE+_s | full | 80 (146560) | 43.7 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_s_80e_coco.pdparams) | [config](../../ppyoloe/ppyoloe_plus_crn_s_80e_coco.yml) | + +**注意:** + - 以上模型训练默认使用8 GPUs,总batch_size默认为64,默认初始学习率为0.001。如果改动了总batch_size,请按线性比例相应地调整学习率。 ### [Faster R-CNN](../../faster_rcnn) -| 基础模型 | 监督数据比例 | mAPval
0.5:0.95 | 模型下载 | 配置文件 | -| :---------------: | :-------------: | :---------------------: |:--------: | :---------: | -| Faster R-CNN ResNet50-FPN | 10% | 25.6 | [download](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_2x_coco_sup010.pdparams) | [config](faster_rcnn_r50_fpn_2x_coco_sup010.yml) | -| Faster R-CNN ResNet50-FPN | full | 40.0 | [download](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_2x_coco.pdparams) | [config](../../configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.yml) | +| 基础模型 | 监督数据比例 | Epochs (Iters) | mAPval
0.5:0.95 | 模型下载 | 配置文件 | +| :---------------: | :-------------: | :---------------: | :---------------------: |:--------: | :---------: | +| Faster R-CNN ResNet50-FPN | 5% | 24 (8712) | 20.7 | [download](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_2x_coco_sup005.pdparams) | [config](faster_rcnn_r50_fpn_2x_coco_sup005.yml) | +| Faster R-CNN ResNet50-FPN | 10% | 24 (17424) | 25.6 | [download](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_2x_coco_sup010.pdparams) | [config](faster_rcnn_r50_fpn_2x_coco_sup010.yml) | +| Faster R-CNN ResNet50-FPN | full | 24 (175896) | 40.0 | [download](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_2x_coco.pdparams) | [config](../../configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.yml) | +**注意:** + - 以上模型训练默认使用8 GPUs,总batch_size默认为16,默认初始学习率为0.02。如果改动了总batch_size,请按线性比例相应地调整学习率。 -### [RetinaNet](../../retinanet) -| 基础模型 | 监督数据比例 | mAPval
0.5:0.95 | 模型下载 | 配置文件 | -| :---------------: | :-------------: | :---------------------: |:--------: | :---------: | -| RetinaNet ResNet50-FPN | 10% | 23.6 | [download](https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_2x_coco_sup010.pdparams) | [config](retinanet_r50_fpn_2x_coco_sup010.yml) | -| RetinaNet ResNet50-FPN | full | 37.5(1x) | [download](https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_1x_coco.pdparams) | [config](../../configs/retinanet/retinanet_r50_fpn_1x_coco.yml) | +### [RetinaNet](../../retinanet) +| 基础模型 | 监督数据比例 | Epochs (Iters) | mAPval
0.5:0.95 | 模型下载 | 配置文件 | +| :---------------: | :-------------: | :---------------: | :---------------------: |:--------: | :---------: | +| RetinaNet ResNet50-FPN | 5% | 24 (8712) | 13.9 | [download](https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_2x_coco_sup005.pdparams) | [config](retinanet_r50_fpn_2x_coco_sup005.yml) | +| RetinaNet ResNet50-FPN | 10% | 24 (17424) | 23.6 | [download](https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_2x_coco_sup010.pdparams) | [config](retinanet_r50_fpn_2x_coco_sup010.yml) | +| RetinaNet ResNet50-FPN | full | 24 (175896) | 39.1 | [download](https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_2x_coco.pdparams) | [config](../../configs/retinanet/retinanet_r50_fpn_2x_coco.yml) | **注意:** + - 以上模型训练默认使用8 GPUs,总batch_size默认为16,默认初始学习率为0.01。如果改动了总batch_size,请按线性比例相应地调整学习率。 + + +### 注意事项 - COCO部分监督数据集请参照 [数据集准备](../README.md) 去下载和准备,各个比例的训练集均为**从train2017中抽取部分百分比的子集**,默认使用`fold`号为1的划分子集,`sup010`表示抽取10%的监督数据训练,`sup005`表示抽取5%,`full`表示全部train2017,验证集均为val2017全量; - 抽取部分百分比的监督数据的抽法不同,或使用的`fold`号不同,精度都会因此而有约0.5 mAP之多的差异; - PP-YOLOE+ 使用Objects365预训练,其余模型均使用ImageNet预训练; - - PP-YOLOE+ 训练80 epoch,其余模型均训练24 epoch,; + - 线型比例相应调整学习率,参照公式: **lrnew = lrdefault * (batch_sizenew * GPU_numbernew) / (batch_sizedefault * GPU_numberdefault)**。 ## 使用教程 @@ -57,7 +71,7 @@ weights=output/${job_name}/model_final.pdparams # 1.training # CUDA_VISIBLE_DEVICES=0 python tools/train.py -c ${config} -python -m paddle.distributed.launch --log_dir=${log_dir} --gpus 0,1,2,3,4,5,6,7 tools/train.py -c ${config} --eval +python -m paddle.distributed.launch --log_dir=${log_dir} --gpus 0,1,2,3,4,5,6,7 tools/train.py -c ${config} --eval --amp # 2.eval CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c ${config} -o weights=${weights} diff --git a/configs/ssod/baseline/faster_rcnn_r50_fpn_2x_coco_sup005.yml b/configs/ssod/baseline/faster_rcnn_r50_fpn_2x_coco_sup005.yml new file mode 100644 index 000000000..d0e4cf702 --- /dev/null +++ b/configs/ssod/baseline/faster_rcnn_r50_fpn_2x_coco_sup005.yml @@ -0,0 +1,42 @@ +_BASE_: [ + '../../faster_rcnn/faster_rcnn_r50_fpn_2x_coco.yml', +] +log_iter: 50 +snapshot_epoch: 2 +weights: output/faster_rcnn_r50_fpn_2x_coco_sup005/model_final + + +TrainDataset: + !COCODataSet + image_dir: train2017 + anno_path: semi_annotations/instances_train2017.1@5.json + dataset_dir: dataset/coco + data_fields: ['image', 'gt_bbox', 'gt_class'] + + +worker_num: 2 +TrainReader: + sample_transforms: + - Decode: {} + - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True} + - RandomFlip: {} + - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - Permute: {} + batch_transforms: + - PadBatch: {pad_to_stride: 32} + batch_size: 2 + shuffle: true + drop_last: true + collate_batch: false + + +epoch: 24 +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [16, 22] + - !LinearWarmup + start_factor: 0.1 + epochs: 1 diff --git a/configs/ssod/baseline/faster_rcnn_r50_fpn_2x_coco_sup010.yml b/configs/ssod/baseline/faster_rcnn_r50_fpn_2x_coco_sup010.yml index 345b083a7..80136304b 100644 --- a/configs/ssod/baseline/faster_rcnn_r50_fpn_2x_coco_sup010.yml +++ b/configs/ssod/baseline/faster_rcnn_r50_fpn_2x_coco_sup010.yml @@ -14,13 +14,29 @@ TrainDataset: data_fields: ['image', 'gt_bbox', 'gt_class'] +worker_num: 2 +TrainReader: + sample_transforms: + - Decode: {} + - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True} + - RandomFlip: {} + - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} + - Permute: {} + batch_transforms: + - PadBatch: {pad_to_stride: 32} + batch_size: 2 + shuffle: true + drop_last: true + collate_batch: false + + epoch: 24 LearningRate: - base_lr: 0.01 + base_lr: 0.02 schedulers: - !PiecewiseDecay gamma: 0.1 milestones: [16, 22] - !LinearWarmup start_factor: 0.1 - steps: 500 + epochs: 1 diff --git a/configs/ssod/baseline/fcos_r50_fpn_2x_coco_sup005.yml b/configs/ssod/baseline/fcos_r50_fpn_2x_coco_sup005.yml index a85b10429..de9982a8c 100644 --- a/configs/ssod/baseline/fcos_r50_fpn_2x_coco_sup005.yml +++ b/configs/ssod/baseline/fcos_r50_fpn_2x_coco_sup005.yml @@ -23,4 +23,4 @@ LearningRate: milestones: [16, 22] - !LinearWarmup start_factor: 0.001 - steps: 1000 + epochs: 1 diff --git a/configs/ssod/baseline/fcos_r50_fpn_2x_coco_sup010.yml b/configs/ssod/baseline/fcos_r50_fpn_2x_coco_sup010.yml index dc44de406..3636ae8bb 100644 --- a/configs/ssod/baseline/fcos_r50_fpn_2x_coco_sup010.yml +++ b/configs/ssod/baseline/fcos_r50_fpn_2x_coco_sup010.yml @@ -23,4 +23,4 @@ LearningRate: milestones: [16, 22] - !LinearWarmup start_factor: 0.001 - steps: 1000 + epochs: 1 diff --git a/configs/ssod/baseline/retinanet_r50_fpn_2x_coco_sup005.yml b/configs/ssod/baseline/retinanet_r50_fpn_2x_coco_sup005.yml new file mode 100644 index 000000000..d901ea26e --- /dev/null +++ b/configs/ssod/baseline/retinanet_r50_fpn_2x_coco_sup005.yml @@ -0,0 +1,26 @@ +_BASE_: [ + '../../retinanet/retinanet_r50_fpn_2x_coco.yml', +] +log_iter: 50 +snapshot_epoch: 2 +weights: output/retinanet_r50_fpn_2x_coco_sup005/model_final + + +TrainDataset: + !COCODataSet + image_dir: train2017 + anno_path: semi_annotations/instances_train2017.1@5.json + dataset_dir: dataset/coco + data_fields: ['image', 'gt_bbox', 'gt_class'] + + +epoch: 24 +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [16, 22] + - !LinearWarmup + start_factor: 0.001 + epochs: 1 diff --git a/configs/ssod/baseline/retinanet_r50_fpn_2x_coco_sup010.yml b/configs/ssod/baseline/retinanet_r50_fpn_2x_coco_sup010.yml index 9b9cc72bc..5480f3c57 100644 --- a/configs/ssod/baseline/retinanet_r50_fpn_2x_coco_sup010.yml +++ b/configs/ssod/baseline/retinanet_r50_fpn_2x_coco_sup010.yml @@ -1,5 +1,5 @@ _BASE_: [ - '../../retinanet/retinanet_r50_fpn_1x_coco.yml', + '../../retinanet/retinanet_r50_fpn_2x_coco.yml', ] log_iter: 50 snapshot_epoch: 2 @@ -23,4 +23,4 @@ LearningRate: milestones: [16, 22] - !LinearWarmup start_factor: 0.001 - steps: 500 + epochs: 1 diff --git a/configs/ssod/denseteacher/README.md b/configs/ssod/denseteacher/README.md new file mode 100644 index 000000000..94e688b61 --- /dev/null +++ b/configs/ssod/denseteacher/README.md @@ -0,0 +1,78 @@ +简体中文 | [English](README_en.md) + +# DenseTeacher (Dense Teacher: Dense Pseudo-Labels for Semi-supervised Object Detection) + +## 模型库 + +| 模型 | 基础检测器 | 监督数据比例 | Sup mAPval
0.5:0.95 | Semi mAPval
0.5:0.95 | Semi Epochs (Iters) | 模型下载 | 配置文件 | +| :------------: | :---------------------: | :-----------: | :-------------------------: |:---------------------------: |:--------------------: | :-------: |:---------: | +| DenseTeacher | [FCOS R50-FPN](../baseline/fcos_r50_fpn_2x_coco_sup005.yml) | 5% | 21.3 | 30.6 | 240 (87120) | [download](https://paddledet.bj.bcebos.com/models/denseteacher_fcos_r50_fpn_coco_semi005.pdparams) | [config](./denseteacher_fcos_r50_fpn_coco_semi005.yml) | +| DenseTeacher | [FCOS R50-FPN](../baseline/fcos_r50_fpn_2x_coco_sup010.yml) | 10%| 26.3 | 35.1 | 240 (174240)| [download](https://paddledet.bj.bcebos.com/models/denseteacher_fcos_r50_fpn_coco_semi010.pdparams) | [config](./denseteacher_fcos_r50_fpn_coco_semi010.yml) | +| DenseTeacher(LSJ)| [FCOS R50-FPN](../baseline/fcos_r50_fpn_2x_coco_sup010.yml)| 10%| 26.3 | 37.1 | 240 (174240)| [download](https://paddledet.bj.bcebos.com/models/denseteacher_fcos_r50_fpn_coco_semi010_lsj.pdparams) | [config](./denseteacher_fcos_r50_fpn_coco_semi010_lsj.yml) | +| DenseTeacher | [FCOS R50-FPN](../../fcos/fcos_r50_fpn_iou_multiscale_2x_coco.ymll) |full| 42.6 | - | 36 (263844)| [download](https://paddledet.bj.bcebos.com/models/denseteacher_fcos_r50_fpn_coco_full.pdparams) | [config](./denseteacher_fcos_r50_fpn_coco_full.yml) | + + +**注意:** + - 以上模型训练默认使用8 GPUs,监督数据总batch_size默认为16,无监督数据总batch_size默认也为16,默认初始学习率为0.01。如果改动了总batch_size,请按线性比例相应地调整学习率; + - `Semi Epochs (Iters)`表示**半监督训练**的模型的 Epochs (Iters),如果使用**自定义数据集**,需自行根据Iters换算到对应的Epochs调整,最好保证总Iters 和COCO数据集的设置较为接近; + - `Sup mAP`是**只使用有监督数据训练**的模型的精度,请参照**基础检测器的配置文件** 和 [baseline](../baseline); + - `Semi mAP`是**半监督训练**的模型的精度,模型下载和配置文件的链接均为**半监督模型**; + - `LSJ`表示 large-scale jittering,表示更大范围的多尺度训练,可进一步提升精度,但训练速度也会变慢; + - 半监督检测的配置和使用,请参照[文档](../README.md/#半监督检测配置); + + +## 使用说明 + +仅训练时必须使用半监督检测的配置文件去训练,评估、预测、部署也可以按基础检测器的配置文件去执行。 + +### 训练 + +```bash +# 单卡训练 (不推荐,需按线性比例相应地调整学习率) +CUDA_VISIBLE_DEVICES=0 python tools/train.py -c ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010.yml --eval + +# 多卡训练 +python -m paddle.distributed.launch --log_dir=denseteacher_fcos_semi010/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010.yml --eval +``` + +### 评估 + +```bash +CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010.yml -o weights=output/denseteacher_fcos_r50_fpn_coco_semi010/model_final.pdparams +``` + +### 预测 + +```bash +CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010.yml -o weights=output/denseteacher_fcos_r50_fpn_coco_semi010/model_final.pdparams --infer_img=demo/000000014439.jpg +``` + +### 部署 + +部署可以使用半监督检测配置文件,也可以使用基础检测器的配置文件去部署和使用。 + +```bash +# 导出模型 +CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010.yml -o weights=https://paddledet.bj.bcebos.com/models/denseteacher_fcos_r50_fpn_coco_semi010.pdparams + +# 导出权重预测 +CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/denseteacher_fcos_r50_fpn_coco_semi010 --image_file=demo/000000014439_640x640.jpg --device=GPU + +# 部署测速 +CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/denseteacher_fcos_r50_fpn_coco_semi010 --image_file=demo/000000014439_640x640.jpg --device=GPU --run_benchmark=True # --run_mode=trt_fp16 + +# 导出ONNX +paddle2onnx --model_dir output_inference/denseteacher_fcos_r50_fpn_coco_semi010/ --model_filename model.pdmodel --params_filename model.pdiparams --opset_version 12 --save_file denseteacher_fcos_r50_fpn_coco_semi010.onnx +``` + + +## 引用 + +``` + @article{denseteacher2022, + title={Dense Teacher: Dense Pseudo-Labels for Semi-supervised Object Detection}, + author={Hongyu Zhou, Zheng Ge, Songtao Liu, Weixin Mao, Zeming Li, Haiyan Yu, Jian Sun}, + journal={arXiv preprint arXiv:2207.02541}, + year={2022} +} +``` diff --git a/configs/ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi005.yml b/configs/ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi005.yml new file mode 100644 index 000000000..7fde22402 --- /dev/null +++ b/configs/ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi005.yml @@ -0,0 +1,159 @@ +_BASE_: [ + '../../fcos/fcos_r50_fpn_iou_multiscale_2x_coco.yml', + '../_base_/coco_detection_percent_5.yml', +] +log_iter: 20 +snapshot_epoch: 5 +epochs: &epochs 240 # 480 will be better +weights: output/denseteacher_fcos_r50_fpn_coco_semi005/model_final + + +### pretrain and warmup config, choose one and coment another +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams +semi_start_iters: 5000 +ema_start_iters: 3000 +use_warmup: &use_warmup True + + +### global config +use_simple_ema: True +ema_decay: 0.9996 +ssod_method: DenseTeacher +DenseTeacher: + train_cfg: + sup_weight: 1.0 + unsup_weight: 1.0 + loss_weight: {distill_loss_cls: 4.0, distill_loss_box: 1.0, distill_loss_quality: 1.0} + concat_sup_data: True + suppress: linear + ratio: 0.01 + gamma: 2.0 + test_cfg: + inference_on: teacher + + +### reader config +worker_num: 2 +SemiTrainReader: + sample_transforms: + - Decode: {} + - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 1} + - RandomFlip: {} + weak_aug: + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} + strong_aug: + - StrongAugImage: {transforms: [ + RandomColorJitter: {prob: 0.8, brightness: 0.4, contrast: 0.4, saturation: 0.4, hue: 0.1}, + RandomErasingCrop: {}, + RandomGaussianBlur: {prob: 0.5, sigma: [0.1, 2.0]}, + RandomGrayscale: {prob: 0.2}, + ]} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} + sup_batch_transforms: + - Permute: {} + - PadBatch: {pad_to_stride: 32} + - Gt2FCOSTarget: + object_sizes_boundary: [64, 128, 256, 512] + center_sampling_radius: 1.5 + downsample_ratios: [8, 16, 32, 64, 128] + norm_reg_targets: True + unsup_batch_transforms: + - Permute: {} + - PadBatch: {pad_to_stride: 32} + sup_batch_size: 2 + unsup_batch_size: 2 + shuffle: True + drop_last: True + +EvalReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [800, 1333], keep_ratio: True, interp: 1} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + batch_transforms: + - PadBatch: {pad_to_stride: 32} + batch_size: 1 + +TestReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [800, 1333], keep_ratio: True, interp: 1} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + batch_transforms: + - PadBatch: {pad_to_stride: 32} + batch_size: 1 + fuse_normalize: True + + +### model config +architecture: FCOS +FCOS: + backbone: ResNet + neck: FPN + fcos_head: FCOSHead + +ResNet: + depth: 50 + variant: 'b' + norm_type: bn + freeze_at: 0 # res2 + return_idx: [1, 2, 3] + num_stages: 4 + +FPN: + out_channel: 256 + spatial_scales: [0.125, 0.0625, 0.03125] + extra_stage: 2 + has_extra_convs: True + use_c5: False + +FCOSHead: + fcos_feat: + name: FCOSFeat + feat_in: 256 + feat_out: 256 + num_convs: 4 + norm_type: "gn" + use_dcn: False + fpn_stride: [8, 16, 32, 64, 128] + prior_prob: 0.01 + norm_reg_targets: True + centerness_on_reg: True + fcos_loss: + name: FCOSLoss + loss_alpha: 0.25 + loss_gamma: 2.0 + iou_loss_type: "giou" + reg_weights: 1.0 + quality: "iou" + nms: + name: MultiClassNMS + nms_top_k: 1000 + keep_top_k: 100 + score_threshold: 0.05 + nms_threshold: 0.6 + + +### other config +epoch: *epochs +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [*epochs] + use_warmup: *use_warmup + - !LinearWarmup + start_factor: 0.001 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + clip_grad_by_value: 1.0 diff --git a/configs/ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010.yml b/configs/ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010.yml new file mode 100644 index 000000000..d6c3ac97b --- /dev/null +++ b/configs/ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010.yml @@ -0,0 +1,164 @@ +_BASE_: [ + '../../fcos/fcos_r50_fpn_iou_multiscale_2x_coco.yml', + '../_base_/coco_detection_percent_10.yml', +] +log_iter: 50 +snapshot_epoch: 5 +epochs: &epochs 240 +weights: output/denseteacher_fcos_r50_fpn_coco_semi010/model_final + + +### pretrain and warmup config, choose one and coment another +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams +semi_start_iters: 5000 +ema_start_iters: 3000 +use_warmup: &use_warmup True + + +### global config +use_simple_ema: True +ema_decay: 0.9996 +ssod_method: DenseTeacher +DenseTeacher: + train_cfg: + sup_weight: 1.0 + unsup_weight: 1.0 + loss_weight: {distill_loss_cls: 4.0, distill_loss_box: 1.0, distill_loss_quality: 1.0} + concat_sup_data: True + suppress: linear + ratio: 0.01 + gamma: 2.0 + test_cfg: + inference_on: teacher + + +### reader config +worker_num: 2 +SemiTrainReader: + sample_transforms: + - Decode: {} + - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 1} + - RandomFlip: {} + weak_aug: + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} + strong_aug: + - StrongAugImage: {transforms: [ + RandomColorJitter: {prob: 0.8, brightness: 0.4, contrast: 0.4, saturation: 0.4, hue: 0.1}, + RandomErasingCrop: {}, + RandomGaussianBlur: {prob: 0.5, sigma: [0.1, 2.0]}, + RandomGrayscale: {prob: 0.2}, + ]} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} + sup_batch_transforms: + - Permute: {} + - PadBatch: {pad_to_stride: 32} + - Gt2FCOSTarget: + object_sizes_boundary: [64, 128, 256, 512] + center_sampling_radius: 1.5 + downsample_ratios: [8, 16, 32, 64, 128] + num_shift: 0. # default 0.5 + multiply_strides_reg_targets: False + norm_reg_targets: True + unsup_batch_transforms: + - Permute: {} + - PadBatch: {pad_to_stride: 32} + sup_batch_size: 2 + unsup_batch_size: 2 + shuffle: True + drop_last: True + +EvalReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [800, 1333], keep_ratio: True, interp: 1} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + batch_transforms: + - PadBatch: {pad_to_stride: 32} + batch_size: 1 + +TestReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [800, 1333], keep_ratio: True, interp: 1} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + batch_transforms: + - PadBatch: {pad_to_stride: 32} + batch_size: 1 + fuse_normalize: True + + +### model config +architecture: FCOS +FCOS: + backbone: ResNet + neck: FPN + fcos_head: FCOSHead + +ResNet: + depth: 50 + variant: 'b' + norm_type: bn + freeze_at: 0 # res2 + return_idx: [1, 2, 3] + num_stages: 4 + +FPN: + out_channel: 256 + spatial_scales: [0.125, 0.0625, 0.03125] + extra_stage: 2 + has_extra_convs: True + use_c5: False + +FCOSHead: + fcos_feat: + name: FCOSFeat + feat_in: 256 + feat_out: 256 + num_convs: 4 + norm_type: "gn" + use_dcn: False + fpn_stride: [8, 16, 32, 64, 128] + prior_prob: 0.01 + norm_reg_targets: True + centerness_on_reg: True + num_shift: 0. # default 0.5 + multiply_strides_reg_targets: False + sqrt_score: False + fcos_loss: + name: FCOSLoss + loss_alpha: 0.25 + loss_gamma: 2.0 + iou_loss_type: "giou" + reg_weights: 1.0 + quality: "iou" + nms: + name: MultiClassNMS + nms_top_k: 1000 + keep_top_k: 100 + score_threshold: 0.05 + nms_threshold: 0.6 + + +### other config +epoch: *epochs +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [*epochs] + use_warmup: *use_warmup + - !LinearWarmup + start_factor: 0.001 + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + clip_grad_by_value: 1.0 diff --git a/configs/ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010_lsj.yml b/configs/ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010_lsj.yml new file mode 100644 index 000000000..32107c93f --- /dev/null +++ b/configs/ssod/denseteacher/denseteacher_fcos_r50_fpn_coco_semi010_lsj.yml @@ -0,0 +1,44 @@ +_BASE_: [ + 'denseteacher_fcos_r50_fpn_coco_semi010.yml', +] +log_iter: 50 +snapshot_epoch: 5 +epochs: &epochs 240 +weights: output/denseteacher_fcos_r50_fpn_coco_semi010_lsj/model_final + + +### reader config +worker_num: 2 +SemiTrainReader: + sample_transforms: + - Decode: {} + # large-scale jittering + - RandomResize: {target_size: [[400, 1333], [1200, 1333]], keep_ratio: True, interp: 1, random_range: True} + - RandomFlip: {} + weak_aug: + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} + strong_aug: + - StrongAugImage: {transforms: [ + RandomColorJitter: {prob: 0.8, brightness: 0.4, contrast: 0.4, saturation: 0.4, hue: 0.1}, + RandomErasingCrop: {}, + RandomGaussianBlur: {prob: 0.5, sigma: [0.1, 2.0]}, + RandomGrayscale: {prob: 0.2}, + ]} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} + sup_batch_transforms: + - Permute: {} + - PadBatch: {pad_to_stride: 32} + - Gt2FCOSTarget: + object_sizes_boundary: [64, 128, 256, 512] + center_sampling_radius: 1.5 + downsample_ratios: [8, 16, 32, 64, 128] + num_shift: 0. # default 0.5 + multiply_strides_reg_targets: False + norm_reg_targets: True + unsup_batch_transforms: + - Permute: {} + - PadBatch: {pad_to_stride: 32} + sup_batch_size: 2 + unsup_batch_size: 2 + shuffle: True + drop_last: True diff --git a/ppdet/data/reader.py b/ppdet/data/reader.py index f04fd6b33..227fabca6 100644 --- a/ppdet/data/reader.py +++ b/ppdet/data/reader.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import os import traceback import six @@ -21,6 +22,10 @@ if sys.version_info >= (3, 0): else: pass import numpy as np +import paddle +import paddle.nn.functional as F + +from copy import deepcopy from paddle.io import DataLoader, DistributedBatchSampler from .utils import default_collate_fn @@ -300,3 +305,307 @@ class TestMOTReader(BaseDataLoader): super(TestMOTReader, self).__init__(sample_transforms, batch_transforms, batch_size, shuffle, drop_last, num_classes, **kwargs) + + +# For Semi-Supervised Object Detection (SSOD) +class Compose_SSOD(object): + def __init__(self, base_transforms, weak_aug, strong_aug, num_classes=80): + self.base_transforms = base_transforms + self.base_transforms_cls = [] + for t in self.base_transforms: + for k, v in t.items(): + op_cls = getattr(transform, k) + f = op_cls(**v) + if hasattr(f, 'num_classes'): + f.num_classes = num_classes + self.base_transforms_cls.append(f) + + self.weak_augs = weak_aug + self.weak_augs_cls = [] + for t in self.weak_augs: + for k, v in t.items(): + op_cls = getattr(transform, k) + f = op_cls(**v) + if hasattr(f, 'num_classes'): + f.num_classes = num_classes + self.weak_augs_cls.append(f) + + self.strong_augs = strong_aug + self.strong_augs_cls = [] + for t in self.strong_augs: + for k, v in t.items(): + op_cls = getattr(transform, k) + f = op_cls(**v) + if hasattr(f, 'num_classes'): + f.num_classes = num_classes + self.strong_augs_cls.append(f) + + def __call__(self, data): + for f in self.base_transforms_cls: + try: + data = f(data) + except Exception as e: + stack_info = traceback.format_exc() + logger.warning("fail to map sample transform [{}] " + "with error: {} and stack:\n{}".format( + f, e, str(stack_info))) + raise e + + weak_data = deepcopy(data) + strong_data = deepcopy(data) + for f in self.weak_augs_cls: + try: + weak_data = f(weak_data) + except Exception as e: + stack_info = traceback.format_exc() + logger.warning("fail to map weak aug [{}] " + "with error: {} and stack:\n{}".format( + f, e, str(stack_info))) + raise e + + for f in self.strong_augs_cls: + try: + strong_data = f(strong_data) + except Exception as e: + stack_info = traceback.format_exc() + logger.warning("fail to map strong aug [{}] " + "with error: {} and stack:\n{}".format( + f, e, str(stack_info))) + raise e + + weak_data['strong_aug'] = strong_data + return weak_data + + +class BatchCompose_SSOD(Compose): + def __init__(self, transforms, num_classes=80, collate_batch=True): + super(BatchCompose_SSOD, self).__init__(transforms, num_classes) + self.collate_batch = collate_batch + + def __call__(self, data): + # split strong_data from data(weak_data) + strong_data = [] + for sample in data: + strong_data.append(sample['strong_aug']) + sample.pop('strong_aug') + + for f in self.transforms_cls: + try: + data = f(data) + strong_data = f(strong_data) + except Exception as e: + stack_info = traceback.format_exc() + logger.warning("fail to map batch transform [{}] " + "with error: {} and stack:\n{}".format( + f, e, str(stack_info))) + raise e + + # remove keys which is not needed by model + extra_key = ['h', 'w', 'flipped'] + for k in extra_key: + for sample in data: + if k in sample: + sample.pop(k) + for sample in strong_data: + if k in sample: + sample.pop(k) + + # batch data, if user-define batch function needed + # use user-defined here + if self.collate_batch: + batch_data = default_collate_fn(data) + strong_batch_data = default_collate_fn(strong_data) + return batch_data, strong_batch_data + else: + batch_data = {} + for k in data[0].keys(): + tmp_data = [] + for i in range(len(data)): + tmp_data.append(data[i][k]) + if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k: + tmp_data = np.stack(tmp_data, axis=0) + batch_data[k] = tmp_data + + strong_batch_data = {} + for k in strong_data[0].keys(): + tmp_data = [] + for i in range(len(strong_data)): + tmp_data.append(strong_data[i][k]) + if not 'gt_' in k and not 'is_crowd' in k and not 'difficult' in k: + tmp_data = np.stack(tmp_data, axis=0) + strong_batch_data[k] = tmp_data + + return batch_data, strong_batch_data + + +class CombineSSODLoader(object): + def __init__(self, label_loader, unlabel_loader): + self.label_loader = label_loader + self.unlabel_loader = unlabel_loader + + def __iter__(self): + while True: + try: + label_samples = next(self.label_loader_iter) + except: + self.label_loader_iter = iter(self.label_loader) + label_samples = next(self.label_loader_iter) + + try: + unlabel_samples = next(self.unlabel_loader_iter) + except: + self.unlabel_loader_iter = iter(self.unlabel_loader) + unlabel_samples = next(self.unlabel_loader_iter) + + yield ( + label_samples[0], # sup weak + label_samples[1], # sup strong + unlabel_samples[0], # unsup weak + unlabel_samples[1] # unsup strong + ) + + def __call__(self): + return self.__iter__() + + +class BaseSemiDataLoader(object): + def __init__(self, + sample_transforms=[], + weak_aug=[], + strong_aug=[], + sup_batch_transforms=[], + unsup_batch_transforms=[], + sup_batch_size=1, + unsup_batch_size=1, + shuffle=True, + drop_last=True, + num_classes=80, + collate_batch=True, + use_shared_memory=False, + **kwargs): + # sup transforms + self._sample_transforms_label = Compose_SSOD( + sample_transforms, weak_aug, strong_aug, num_classes=num_classes) + self._batch_transforms_label = BatchCompose_SSOD( + sup_batch_transforms, num_classes, collate_batch) + self.batch_size_label = sup_batch_size + + # unsup transforms + self._sample_transforms_unlabel = Compose_SSOD( + sample_transforms, weak_aug, strong_aug, num_classes=num_classes) + self._batch_transforms_unlabel = BatchCompose_SSOD( + unsup_batch_transforms, num_classes, collate_batch) + self.batch_size_unlabel = unsup_batch_size + + # common + self.shuffle = shuffle + self.drop_last = drop_last + self.use_shared_memory = use_shared_memory + self.kwargs = kwargs + + def __call__(self, + dataset_label, + dataset_unlabel, + worker_num, + batch_sampler_label=None, + batch_sampler_unlabel=None, + return_list=False): + # sup dataset + self.dataset_label = dataset_label + self.dataset_label.check_or_download_dataset() + self.dataset_label.parse_dataset() + self.dataset_label.set_transform(self._sample_transforms_label) + self.dataset_label.set_kwargs(**self.kwargs) + if batch_sampler_label is None: + self._batch_sampler_label = DistributedBatchSampler( + self.dataset_label, + batch_size=self.batch_size_label, + shuffle=self.shuffle, + drop_last=self.drop_last) + else: + self._batch_sampler_label = batch_sampler_label + + # unsup dataset + self.dataset_unlabel = dataset_unlabel + self.dataset_unlabel.length = self.dataset_label.__len__() + self.dataset_unlabel.check_or_download_dataset() + self.dataset_unlabel.parse_dataset() + self.dataset_unlabel.set_transform(self._sample_transforms_unlabel) + self.dataset_unlabel.set_kwargs(**self.kwargs) + if batch_sampler_unlabel is None: + self._batch_sampler_unlabel = DistributedBatchSampler( + self.dataset_unlabel, + batch_size=self.batch_size_unlabel, + shuffle=self.shuffle, + drop_last=self.drop_last) + else: + self._batch_sampler_unlabel = batch_sampler_unlabel + + # DataLoader do not start sub-process in Windows and Mac + # system, do not need to use shared memory + use_shared_memory = self.use_shared_memory and \ + sys.platform not in ['win32', 'darwin'] + # check whether shared memory size is bigger than 1G(1024M) + if use_shared_memory: + shm_size = _get_shared_memory_size_in_M() + if shm_size is not None and shm_size < 1024.: + logger.warning("Shared memory size is less than 1G, " + "disable shared_memory in DataLoader") + use_shared_memory = False + + self.dataloader_label = DataLoader( + dataset=self.dataset_label, + batch_sampler=self._batch_sampler_label, + collate_fn=self._batch_transforms_label, + num_workers=worker_num, + return_list=return_list, + use_shared_memory=use_shared_memory) + + self.dataloader_unlabel = DataLoader( + dataset=self.dataset_unlabel, + batch_sampler=self._batch_sampler_unlabel, + collate_fn=self._batch_transforms_unlabel, + num_workers=worker_num, + return_list=return_list, + use_shared_memory=use_shared_memory) + + self.dataloader = CombineSSODLoader(self.dataloader_label, + self.dataloader_unlabel) + self.loader = iter(self.dataloader) + return self + + def __len__(self): + return len(self._batch_sampler_label) + + def __iter__(self): + return self + + def __next__(self): + return next(self.loader) + + def next(self): + # python2 compatibility + return self.__next__() + + +@register +class SemiTrainReader(BaseSemiDataLoader): + __shared__ = ['num_classes'] + + def __init__(self, + sample_transforms=[], + weak_aug=[], + strong_aug=[], + sup_batch_transforms=[], + unsup_batch_transforms=[], + sup_batch_size=1, + unsup_batch_size=1, + shuffle=True, + drop_last=True, + num_classes=80, + collate_batch=True, + **kwargs): + super(SemiTrainReader, self).__init__( + sample_transforms, weak_aug, strong_aug, sup_batch_transforms, + unsup_batch_transforms, sup_batch_size, unsup_batch_size, shuffle, + drop_last, num_classes, collate_batch, **kwargs) diff --git a/ppdet/data/source/coco.py b/ppdet/data/source/coco.py index 80bd48a41..6578de183 100644 --- a/ppdet/data/source/coco.py +++ b/ppdet/data/source/coco.py @@ -13,6 +13,11 @@ # limitations under the License. import os +import copy +try: + from collections.abc import Sequence +except Exception: + from collections import Sequence import numpy as np from ppdet.core.workspace import register, serializable from .dataset import DetDataset @@ -20,6 +25,8 @@ from .dataset import DetDataset from ppdet.utils.logger import setup_logger logger = setup_logger(__name__) +__all__ = ['COCODataSet', 'SlicedCOCODataSet', 'SemiCOCODataSet'] + @register @serializable @@ -223,7 +230,8 @@ class COCODataSet(DetDataset): if self.sample_num > 0 and ct >= self.sample_num: break assert ct > 0, 'not found any coco record in %s' % (anno_path) - logger.debug('{} samples in file {}'.format(ct, anno_path)) + logger.info('Load [{} samples valid, {} samples invalid] in file {}.'. + format(ct, len(img_ids) - ct, anno_path)) if self.allow_empty and len(empty_records) > 0: empty_records = self._sample_empty(empty_records, len(records)) records += empty_records @@ -351,3 +359,220 @@ class SlicedCOCODataSet(COCODataSet): empty_records = self._sample_empty(empty_records, len(records)) records += empty_records self.roidbs = records + + +@register +@serializable +class SemiCOCODataSet(COCODataSet): + """Semi-COCODataSet used for supervised and unsupervised dataSet""" + + def __init__(self, + dataset_dir=None, + image_dir=None, + anno_path=None, + data_fields=['image'], + sample_num=-1, + load_crowd=False, + allow_empty=False, + empty_ratio=1., + repeat=1, + supervised=True): + super(SemiCOCODataSet, self).__init__( + dataset_dir, image_dir, anno_path, data_fields, sample_num, + load_crowd, allow_empty, empty_ratio, repeat) + self.supervised = supervised + self.length = -1 # defalut -1 means all + + def parse_dataset(self): + anno_path = os.path.join(self.dataset_dir, self.anno_path) + image_dir = os.path.join(self.dataset_dir, self.image_dir) + + assert anno_path.endswith('.json'), \ + 'invalid coco annotation file: ' + anno_path + from pycocotools.coco import COCO + coco = COCO(anno_path) + img_ids = coco.getImgIds() + img_ids.sort() + cat_ids = coco.getCatIds() + records = [] + empty_records = [] + ct = 0 + + self.catid2clsid = dict({catid: i for i, catid in enumerate(cat_ids)}) + self.cname2cid = dict({ + coco.loadCats(catid)[0]['name']: clsid + for catid, clsid in self.catid2clsid.items() + }) + + if 'annotations' not in coco.dataset or self.supervised == False: + self.load_image_only = True + logger.warning('Annotation file: {} does not contains ground truth ' + 'and load image information only.'.format(anno_path)) + + for img_id in img_ids: + img_anno = coco.loadImgs([img_id])[0] + im_fname = img_anno['file_name'] + im_w = float(img_anno['width']) + im_h = float(img_anno['height']) + + im_path = os.path.join(image_dir, + im_fname) if image_dir else im_fname + is_empty = False + if not os.path.exists(im_path): + logger.warning('Illegal image file: {}, and it will be ' + 'ignored'.format(im_path)) + continue + + if im_w < 0 or im_h < 0: + logger.warning('Illegal width: {} or height: {} in annotation, ' + 'and im_id: {} will be ignored'.format( + im_w, im_h, img_id)) + continue + + coco_rec = { + 'im_file': im_path, + 'im_id': np.array([img_id]), + 'h': im_h, + 'w': im_w, + } if 'image' in self.data_fields else {} + + if not self.load_image_only: + ins_anno_ids = coco.getAnnIds( + imgIds=[img_id], iscrowd=None if self.load_crowd else False) + instances = coco.loadAnns(ins_anno_ids) + + bboxes = [] + is_rbox_anno = False + for inst in instances: + # check gt bbox + if inst.get('ignore', False): + continue + if 'bbox' not in inst.keys(): + continue + else: + if not any(np.array(inst['bbox'])): + continue + + x1, y1, box_w, box_h = inst['bbox'] + x2 = x1 + box_w + y2 = y1 + box_h + eps = 1e-5 + if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps: + inst['clean_bbox'] = [ + round(float(x), 3) for x in [x1, y1, x2, y2] + ] + bboxes.append(inst) + else: + logger.warning( + 'Found an invalid bbox in annotations: im_id: {}, ' + 'area: {} x1: {}, y1: {}, x2: {}, y2: {}.'.format( + img_id, float(inst['area']), x1, y1, x2, y2)) + + num_bbox = len(bboxes) + if num_bbox <= 0 and not self.allow_empty: + continue + elif num_bbox <= 0: + is_empty = True + + gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) + gt_class = np.zeros((num_bbox, 1), dtype=np.int32) + is_crowd = np.zeros((num_bbox, 1), dtype=np.int32) + gt_poly = [None] * num_bbox + + has_segmentation = False + for i, box in enumerate(bboxes): + catid = box['category_id'] + gt_class[i][0] = self.catid2clsid[catid] + gt_bbox[i, :] = box['clean_bbox'] + is_crowd[i][0] = box['iscrowd'] + # check RLE format + if 'segmentation' in box and box['iscrowd'] == 1: + gt_poly[i] = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] + elif 'segmentation' in box and box['segmentation']: + if not np.array(box['segmentation'] + ).size > 0 and not self.allow_empty: + bboxes.pop(i) + gt_poly.pop(i) + np.delete(is_crowd, i) + np.delete(gt_class, i) + np.delete(gt_bbox, i) + else: + gt_poly[i] = box['segmentation'] + has_segmentation = True + + if has_segmentation and not any( + gt_poly) and not self.allow_empty: + continue + + gt_rec = { + 'is_crowd': is_crowd, + 'gt_class': gt_class, + 'gt_bbox': gt_bbox, + 'gt_poly': gt_poly, + } + + for k, v in gt_rec.items(): + if k in self.data_fields: + coco_rec[k] = v + + # TODO: remove load_semantic + if self.load_semantic and 'semantic' in self.data_fields: + seg_path = os.path.join(self.dataset_dir, 'stuffthingmaps', + 'train2017', im_fname[:-3] + 'png') + coco_rec.update({'semantic': seg_path}) + + logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format( + im_path, img_id, im_h, im_w)) + if is_empty: + empty_records.append(coco_rec) + else: + records.append(coco_rec) + ct += 1 + if self.sample_num > 0 and ct >= self.sample_num: + break + assert ct > 0, 'not found any coco record in %s' % (anno_path) + logger.info('Load [{} samples valid, {} samples invalid] in file {}.'. + format(ct, len(img_ids) - ct, anno_path)) + if self.allow_empty and len(empty_records) > 0: + empty_records = self._sample_empty(empty_records, len(records)) + records += empty_records + self.roidbs = records + + if self.supervised: + logger.info(f'Use {len(self.roidbs)} sup_samples data as LABELED') + else: + if self.length > 0: # unsup length will be decide by sup length + all_roidbs = self.roidbs.copy() + selected_idxs = [ + np.random.choice(len(all_roidbs)) + for _ in range(self.length) + ] + self.roidbs = [all_roidbs[i] for i in selected_idxs] + logger.info( + f'Use {len(self.roidbs)} unsup_samples data as UNLABELED') + + def __getitem__(self, idx): + n = len(self.roidbs) + if self.repeat > 1: + idx %= n + # data batch + roidb = copy.deepcopy(self.roidbs[idx]) + if self.mixup_epoch == 0 or self._epoch < self.mixup_epoch: + idx = np.random.randint(n) + roidb = [roidb, copy.deepcopy(self.roidbs[idx])] + elif self.cutmix_epoch == 0 or self._epoch < self.cutmix_epoch: + idx = np.random.randint(n) + roidb = [roidb, copy.deepcopy(self.roidbs[idx])] + elif self.mosaic_epoch == 0 or self._epoch < self.mosaic_epoch: + roidb = [roidb, ] + [ + copy.deepcopy(self.roidbs[np.random.randint(n)]) + for _ in range(4) + ] + if isinstance(roidb, Sequence): + for r in roidb: + r['curr_iter'] = self._curr_iter + else: + roidb['curr_iter'] = self._curr_iter + self._curr_iter += 1 + + return self.transform(roidb) diff --git a/ppdet/data/transform/batch_operators.py b/ppdet/data/transform/batch_operators.py index 7df9cd6d6..0c48ffbd0 100644 --- a/ppdet/data/transform/batch_operators.py +++ b/ppdet/data/transform/batch_operators.py @@ -292,7 +292,9 @@ class Gt2FCOSTarget(BaseOperator): object_sizes_boundary, center_sampling_radius, downsample_ratios, - norm_reg_targets=False): + num_shift=0.5, + multiply_strides_reg_targets=False, + norm_reg_targets=True): super(Gt2FCOSTarget, self).__init__() self.center_sampling_radius = center_sampling_radius self.downsample_ratios = downsample_ratios @@ -304,6 +306,8 @@ class Gt2FCOSTarget(BaseOperator): self.object_sizes_boundary[i], self.object_sizes_boundary[i + 1] ]) self.object_sizes_of_interest = object_sizes_of_interest + self.num_shift = num_shift + self.multiply_strides_reg_targets = multiply_strides_reg_targets self.norm_reg_targets = norm_reg_targets def _compute_points(self, w, h): @@ -320,7 +324,8 @@ class Gt2FCOSTarget(BaseOperator): shift_x, shift_y = np.meshgrid(shift_x, shift_y) shift_x = shift_x.flatten() shift_y = shift_y.flatten() - location = np.stack([shift_x, shift_y], axis=1) + stride // 2 + location = np.stack( + [shift_x, shift_y], axis=1) + stride * self.num_shift locations.append(location) num_points_each_level = [len(location) for location in locations] locations = np.concatenate(locations, axis=0) @@ -459,11 +464,16 @@ class Gt2FCOSTarget(BaseOperator): grid_w = int(np.ceil(w / self.downsample_ratios[lvl])) grid_h = int(np.ceil(h / self.downsample_ratios[lvl])) if self.norm_reg_targets: - sample['reg_target{}'.format(lvl)] = \ - np.reshape( - reg_targets_by_level[lvl] / \ - self.downsample_ratios[lvl], + if self.multiply_strides_reg_targets: + sample['reg_target{}'.format(lvl)] = np.reshape( + reg_targets_by_level[lvl], newshape=[grid_h, grid_w, 4]) + else: + sample['reg_target{}'.format(lvl)] = \ + np.reshape( + reg_targets_by_level[lvl] / \ + self.downsample_ratios[lvl], + newshape=[grid_h, grid_w, 4]) else: sample['reg_target{}'.format(lvl)] = np.reshape( reg_targets_by_level[lvl], @@ -575,9 +585,9 @@ class Gt2GFLTarget(BaseOperator): gt_bboxes, gt_bboxes_ignore, gt_labels) - vlr_region = self.assigner.get_vlr_region(grid_cells, num_level_cells, - gt_bboxes, gt_bboxes_ignore, - gt_labels) + vlr_region = self.assigner.get_vlr_region( + grid_cells, num_level_cells, gt_bboxes, gt_bboxes_ignore, + gt_labels) pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds = self.get_sample( assign_gt_inds, gt_bboxes) diff --git a/ppdet/data/transform/operators.py b/ppdet/data/transform/operators.py index d00b26f38..b706a43ba 100644 --- a/ppdet/data/transform/operators.py +++ b/ppdet/data/transform/operators.py @@ -40,6 +40,7 @@ import pickle import threading MUTEX = threading.Lock() +import paddle from ppdet.core.workspace import serializable from ..reader import Compose @@ -926,6 +927,7 @@ class RandomResize(BaseOperator): target_size, keep_ratio=True, interp=cv2.INTER_LINEAR, + random_range=False, random_size=True, random_interp=False): """ @@ -934,6 +936,8 @@ class RandomResize(BaseOperator): target_size (int, list, tuple): image target size, if random size is True, must be list or tuple keep_ratio (bool): whether keep_raio or not, default true interp (int): the interpolation method + random_range (bool): whether random select target size of image, the target_size must be + a [[min_short_edge, long_edge], [max_short_edge, long_edge]] random_size (bool): whether random select target size of image random_interp (bool): whether random select interpolation method """ @@ -949,21 +953,33 @@ class RandomResize(BaseOperator): ] assert isinstance(target_size, ( Integral, Sequence)), "target_size must be Integer, List or Tuple" - if random_size and not isinstance(target_size, Sequence): + if (random_range or random_size) and not isinstance(target_size, + Sequence): raise TypeError( - "Type of target_size is invalid when random_size is True. Must be List or Tuple, now is {}". + "Type of target_size is invalid when random_size or random_range is True. Must be List or Tuple, now is {}". format(type(target_size))) + if random_range and not len(target_size) == 2: + raise TypeError( + "target_size must be two list as [[min_short_edge, long_edge], [max_short_edge, long_edge]] when random_range is True." + ) self.target_size = target_size + self.random_range = random_range self.random_size = random_size self.random_interp = random_interp def apply(self, sample, context=None): """ Resize the image numpy. """ - if self.random_size: - target_size = random.choice(self.target_size) + if self.random_range: + short_edge = np.random.randint(self.target_size[0][0], + self.target_size[1][0] + 1) + long_edge = max(self.target_size[0][1], self.target_size[1][1] + 1) + target_size = [short_edge, long_edge] else: - target_size = self.target_size + if self.random_size: + target_size = random.choice(self.target_size) + else: + target_size = self.target_size if self.random_interp: interp = random.choice(self.interps) @@ -3457,3 +3473,165 @@ class RandomShift(BaseOperator): sample['gt_bbox'] = gt_bbox sample['gt_class'] = gt_class return sample + + +@register_op +class StrongAugImage(BaseOperator): + def __init__(self, transforms): + super(StrongAugImage, self).__init__() + self.transforms = Compose(transforms) + + def apply(self, sample, context=None): + im = sample + im['image'] = sample['image'].astype('uint8') + results = self.transforms(im) + sample['image'] = results['image'].astype('uint8') + return sample + + +@register_op +class RandomColorJitter(BaseOperator): + def __init__(self, + prob=0.8, + brightness=0.4, + contrast=0.4, + saturation=0.4, + hue=0.1): + super(RandomColorJitter, self).__init__() + self.prob = prob + self.brightness = brightness + self.contrast = contrast + self.saturation = saturation + self.hue = hue + + def apply(self, sample, context=None): + if np.random.uniform(0, 1) < self.prob: + from paddle.vision.transforms import ColorJitter + transform = ColorJitter(self.brightness, self.contrast, + self.saturation, self.hue) + sample['image'] = transform(sample['image'].astype(np.uint8)) + sample['image'] = sample['image'].astype(np.float32) + return sample + + +@register_op +class RandomGrayscale(BaseOperator): + def __init__(self, prob=0.2): + super(RandomGrayscale, self).__init__() + self.prob = prob + + def apply(self, sample, context=None): + if np.random.uniform(0, 1) < self.prob: + from paddle.vision.transforms import Grayscale + transform = Grayscale(num_output_channels=3) + sample['image'] = transform(sample['image']) + return sample + + +@register_op +class RandomGaussianBlur(BaseOperator): + def __init__(self, prob=0.5, sigma=[0.1, 2.0]): + super(RandomGaussianBlur, self).__init__() + self.prob = prob + self.sigma = sigma + + def apply(self, sample, context=None): + if np.random.uniform(0, 1) < self.prob: + sigma = np.random.uniform(self.sigma[0], self.sigma[1]) + im = cv2.GaussianBlur(sample['image'], (23, 23), sigma) + sample['image'] = im + return sample + + +@register_op +class RandomErasing(BaseOperator): + def __init__(self, + prob=0.5, + scale=(0.02, 0.33), + ratio=(0.3, 3.3), + value=0, + inplace=False): + super(RandomErasing, self).__init__() + assert isinstance(scale, + (tuple, list)), "scale should be a tuple or list" + assert (scale[0] >= 0 and scale[1] <= 1 and scale[0] <= scale[1] + ), "scale should be of kind (min, max) and in range [0, 1]" + assert isinstance(ratio, + (tuple, list)), "ratio should be a tuple or list" + assert (ratio[0] >= 0 and + ratio[0] <= ratio[1]), "ratio should be of kind (min, max)" + assert isinstance( + value, (Number, str, tuple, + list)), "value should be a number, tuple, list or str" + if isinstance(value, str) and value != "random": + raise ValueError("value must be 'random' when type is str") + self.prob = prob + self.scale = scale + self.ratio = ratio + self.value = value + self.inplace = inplace + + def _erase(self, img, i, j, h, w, v, inplace=False): + if not inplace: + img = img.copy() + img[i:i + h, j:j + w, ...] = v + return img + + def _get_param(self, img, scale, ratio, value): + shape = np.asarray(img).astype(np.uint8).shape + h, w, c = shape[-3], shape[-2], shape[-1] + img_area = h * w + log_ratio = np.log(ratio) + for _ in range(1): + erase_area = np.random.uniform(*scale) * img_area + aspect_ratio = np.exp(np.random.uniform(*log_ratio)) + erase_h = int(round(np.sqrt(erase_area * aspect_ratio))) + erase_w = int(round(np.sqrt(erase_area / aspect_ratio))) + if erase_h >= h or erase_w >= w: + continue + + if value is None: + v = np.random.normal(size=[erase_h, erase_w, c]) * 255 + else: + v = np.array(value)[None, None, :] + top = np.random.randint(0, h - erase_h + 1) + left = np.random.randint(0, w - erase_w + 1) + return top, left, erase_h, erase_w, v + return 0, 0, h, w, img + + def apply(self, sample, context=None): + if random.random() < self.prob: + if isinstance(self.value, Number): + value = [self.value] + elif isinstance(self.value, str): + value = None + else: + value = self.value + if value is not None and not (len(value) == 1 or len(value) == 3): + raise ValueError( + "Value should be a single number or a sequence with length equals to image's channel." + ) + im = sample['image'] + top, left, erase_h, erase_w, v = self._get_param(im, self.scale, + self.ratio, value) + im = self._erase(im, top, left, erase_h, erase_w, v, self.inplace) + sample['image'] = im + return sample + + +@register_op +class RandomErasingCrop(BaseOperator): + def __init__(self): + super(RandomErasingCrop, self).__init__() + self.transform1 = RandomErasing( + prob=0.7, scale=(0.05, 0.2), ratio=(0.3, 3.3), value="random") + self.transform2 = RandomErasing( + prob=0.5, scale=(0.05, 0.2), ratio=(0.1, 6), value="random") + self.transform3 = RandomErasing( + prob=0.3, scale=(0.05, 0.2), ratio=(0.05, 8), value="random") + + def apply(self, sample, context=None): + sample = self.transform1(sample) + sample = self.transform2(sample) + sample = self.transform3(sample) + return sample diff --git a/ppdet/engine/__init__.py b/ppdet/engine/__init__.py index 9d14ee634..32cb85e22 100644 --- a/ppdet/engine/__init__.py +++ b/ppdet/engine/__init__.py @@ -28,3 +28,7 @@ __all__ = trainer.__all__ \ from . import tracker from .tracker import * __all__ = __all__ + tracker.__all__ + +from . import trainer_ssod +from .trainer_ssod import * +__all__ = __all__ + trainer_ssod.__all__ diff --git a/ppdet/engine/trainer_ssod.py b/ppdet/engine/trainer_ssod.py new file mode 100644 index 000000000..981a17df8 --- /dev/null +++ b/ppdet/engine/trainer_ssod.py @@ -0,0 +1,475 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import copy +import time +import typing +import math +import numpy as np + +import paddle +import paddle.nn as nn +import paddle.distributed as dist +from paddle.distributed import fleet +from ppdet.optimizer import ModelEMA, SimpleModelEMA + +from ppdet.core.workspace import create +from ppdet.utils.checkpoint import load_weight, load_pretrain_weight +import ppdet.utils.stats as stats +from ppdet.utils import profiler +from ppdet.modeling.ssod_utils import align_weak_strong_shape +from .trainer import Trainer + +from ppdet.utils.logger import setup_logger +logger = setup_logger('ppdet.engine') + +__all__ = ['Trainer_DenseTeacher'] + + +class Trainer_DenseTeacher(Trainer): + def __init__(self, cfg, mode='train'): + self.cfg = cfg + assert mode.lower() in ['train', 'eval', 'test'], \ + "mode should be 'train', 'eval' or 'test'" + self.mode = mode.lower() + self.optimizer = None + self.is_loaded_weights = False + self.use_amp = self.cfg.get('amp', False) + self.amp_level = self.cfg.get('amp_level', 'O1') + self.custom_white_list = self.cfg.get('custom_white_list', None) + self.custom_black_list = self.cfg.get('custom_black_list', None) + + # build data loader + capital_mode = self.mode.capitalize() + self.dataset = self.cfg['{}Dataset'.format(capital_mode)] = create( + '{}Dataset'.format(capital_mode))() + + if self.mode == 'train': + self.dataset_unlabel = self.cfg['UnsupTrainDataset'] = create( + 'UnsupTrainDataset') + self.loader = create('SemiTrainReader')( + self.dataset, self.dataset_unlabel, cfg.worker_num) + + # build model + if 'model' not in self.cfg: + self.model = create(cfg.architecture) + else: + self.model = self.cfg.model + self.is_loaded_weights = True + + # EvalDataset build with BatchSampler to evaluate in single device + # TODO: multi-device evaluate + if self.mode == 'eval': + self._eval_batch_sampler = paddle.io.BatchSampler( + self.dataset, batch_size=self.cfg.EvalReader['batch_size']) + # If metric is VOC, need to be set collate_batch=False. + if cfg.metric == 'VOC': + cfg['EvalReader']['collate_batch'] = False + self.loader = create('EvalReader')(self.dataset, cfg.worker_num, + self._eval_batch_sampler) + # TestDataset build after user set images, skip loader creation here + + # build optimizer in train mode + if self.mode == 'train': + steps_per_epoch = len(self.loader) + if steps_per_epoch < 1: + logger.warning( + "Samples in dataset are less than batch_size, please set smaller batch_size in TrainReader." + ) + self.lr = create('LearningRate')(steps_per_epoch) + self.optimizer = create('OptimizerBuilder')(self.lr, self.model) + + # Unstructured pruner is only enabled in the train mode. + if self.cfg.get('unstructured_prune'): + self.pruner = create('UnstructuredPruner')(self.model, + steps_per_epoch) + if self.use_amp and self.amp_level == 'O2': + self.model, self.optimizer = paddle.amp.decorate( + models=self.model, + optimizers=self.optimizer, + level=self.amp_level) + + self.use_ema = ('use_ema' in cfg and cfg['use_ema']) + if self.use_ema: + ema_decay = self.cfg.get('ema_decay', 0.9998) + ema_decay_type = self.cfg.get('ema_decay_type', 'threshold') + cycle_epoch = self.cfg.get('cycle_epoch', -1) + ema_black_list = self.cfg.get('ema_black_list', None) + self.ema = ModelEMA( + self.model, + decay=ema_decay, + ema_decay_type=ema_decay_type, + cycle_epoch=cycle_epoch, + ema_black_list=ema_black_list) + self.ema_start_iters = self.cfg.get('ema_start_iters', 0) + + # simple_ema for SSOD + self.use_simple_ema = ('use_simple_ema' in cfg and + cfg['use_simple_ema']) + if self.use_simple_ema: + self.use_ema = True + ema_decay = self.cfg.get('ema_decay', 0.9996) + self.ema = SimpleModelEMA(self.model, decay=ema_decay) + self.ema_start_iters = self.cfg.get('ema_start_iters', 0) + + self._nranks = dist.get_world_size() + self._local_rank = dist.get_rank() + + self.status = {} + + self.start_epoch = 0 + self.end_epoch = 0 if 'epoch' not in cfg else cfg.epoch + + # initial default callbacks + self._init_callbacks() + + # initial default metrics + self._init_metrics() + self._reset_metrics() + + def load_weights(self, weights): + if self.is_loaded_weights: + return + self.start_epoch = 0 + load_pretrain_weight(self.model, weights) + load_pretrain_weight(self.ema.model, weights) + logger.info("Load weights {} to start training for teacher and student". + format(weights)) + + def resume_weights(self, weights, exchange=True): + # support Distill resume weights + if hasattr(self.model, 'student_model'): + self.start_epoch = load_weight(self.model.student_model, weights, + self.optimizer, exchange) + else: + self.start_epoch = load_weight(self.model, weights, self.optimizer, + self.ema + if self.use_ema else None, exchange) + logger.debug("Resume weights of epoch {}".format(self.start_epoch)) + + def train(self, validate=False): + self.semi_start_iters = self.cfg.get('semi_start_iters', 5000) + Init_mark = False + if validate: + self.cfg['EvalDataset'] = self.cfg.EvalDataset = create( + "EvalDataset")() + + sync_bn = (getattr(self.cfg, 'norm_type', None) == 'sync_bn' and + self.cfg.use_gpu and self._nranks > 1) + if sync_bn: + self.model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm( + self.model) + + if self.cfg.get('fleet', False): + self.model = fleet.distributed_model(self.model) + self.optimizer = fleet.distributed_optimizer(self.optimizer) + elif self._nranks > 1: + find_unused_parameters = self.cfg[ + 'find_unused_parameters'] if 'find_unused_parameters' in self.cfg else False + self.model = paddle.DataParallel( + self.model, find_unused_parameters=find_unused_parameters) + self.ema.model = paddle.DataParallel( + self.ema.model, find_unused_parameters=find_unused_parameters) + + self.status.update({ + 'epoch_id': self.start_epoch, + 'step_id': 0, + 'steps_per_epoch': len(self.loader) + }) + + self.status['batch_time'] = stats.SmoothedValue( + self.cfg.log_iter, fmt='{avg:.4f}') + self.status['data_time'] = stats.SmoothedValue( + self.cfg.log_iter, fmt='{avg:.4f}') + self.status['training_staus'] = stats.TrainingStats(self.cfg.log_iter) + + if self.cfg.get('print_flops', False): + flops_loader = create('{}Reader'.format(self.mode.capitalize()))( + self.dataset, self.cfg.worker_num) + self._flops(flops_loader) + profiler_options = self.cfg.get('profiler_options', None) + self._compose_callback.on_train_begin(self.status) + + train_cfg = self.cfg.DenseTeacher['train_cfg'] + concat_sup_data = train_cfg.get('concat_sup_data', True) + + for param in self.ema.model.parameters(): + param.stop_gradient = True + + for epoch_id in range(self.start_epoch, self.cfg.epoch): + self.status['mode'] = 'train' + self.status['epoch_id'] = epoch_id + self._compose_callback.on_epoch_begin(self.status) + self.loader.dataset_label.set_epoch(epoch_id) + self.loader.dataset_unlabel.set_epoch(epoch_id) + iter_tic = time.time() + loss_dict = { + 'loss': paddle.to_tensor([0]), + 'loss_sup_sum': paddle.to_tensor([0]), + 'loss_unsup_sum': paddle.to_tensor([0]), + 'fg_sum': paddle.to_tensor([0]), + } + if self._nranks > 1: + for k in self.model._layers.get_loss_keys(): + loss_dict.update({k: paddle.to_tensor([0.])}) + for k in self.model._layers.get_loss_keys(): + loss_dict.update({'distill_' + k: paddle.to_tensor([0.])}) + else: + for k in self.model.get_loss_keys(): + loss_dict.update({k: paddle.to_tensor([0.])}) + for k in self.model.get_loss_keys(): + loss_dict.update({'distill_' + k: paddle.to_tensor([0.])}) + + # Note: for step_id, data in enumerate(self.loader): # enumerate bug + for step_id in range(len(self.loader)): + data = next(self.loader) + + self.model.train() + self.ema.model.eval() + data_sup_w, data_sup_s, data_unsup_w, data_unsup_s = data + + self.status['data_time'].update(time.time() - iter_tic) + self.status['step_id'] = step_id + profiler.add_profiler_step(profiler_options) + self._compose_callback.on_step_begin(self.status) + + if data_sup_w['image'].shape != data_sup_s['image'].shape: + data_sup_w, data_sup_s = align_weak_strong_shape(data_sup_w, + data_sup_s) + + if concat_sup_data: + for k, v in data_sup_s.items(): + data_sup_s[k] = paddle.concat([v, data_sup_w[k]]) + loss_dict_sup = self.model(data_sup_s) + else: + loss_dict_sup_w = self.model(data_sup_w) + loss_dict_sup = self.model(data_sup_s) + for k, v in loss_dict_sup_w.items(): + loss_dict_sup[k] = (loss_dict_sup[k] + v) * 0.5 + + data_sup_w['epoch_id'] = epoch_id + data_sup_s['epoch_id'] = epoch_id + losses_sup = loss_dict_sup['loss'] * train_cfg['sup_weight'] + losses_sup.backward() + + losses = losses_sup.detach() + loss_dict.update(loss_dict_sup) + loss_dict.update({'loss_sup_sum': loss_dict['loss']}) + + curr_iter = len(self.loader) * epoch_id + step_id + st_iter = self.semi_start_iters + if curr_iter == st_iter: + logger.info("***" * 30) + logger.info('Semi starting ...') + logger.info("***" * 30) + if curr_iter > st_iter: + unsup_weight = train_cfg['unsup_weight'] + if train_cfg['suppress'] == 'linear': + tar_iter = st_iter * 2 + if curr_iter <= tar_iter: + unsup_weight *= (curr_iter - st_iter) / st_iter + elif train_cfg['suppress'] == 'exp': + tar_iter = st_iter + 2000 + if curr_iter <= tar_iter: + scale = np.exp((curr_iter - tar_iter) / 1000) + unsup_weight *= scale + elif train_cfg['suppress'] == 'step': + tar_iter = st_iter * 2 + if curr_iter <= tar_iter: + unsup_weight *= 0.25 + else: + raise ValueError + + if data_unsup_w['image'].shape != data_unsup_s[ + 'image'].shape: + data_unsup_w, data_unsup_s = align_weak_strong_shape( + data_unsup_w, data_unsup_s) + + data_unsup_w['epoch_id'] = epoch_id + data_unsup_s['epoch_id'] = epoch_id + + data_unsup_s['get_data'] = True + student_preds = self.model(data_unsup_s) + + with paddle.no_grad(): + data_unsup_w['is_teacher'] = True + teacher_preds = self.ema.model(data_unsup_w) + + if self._nranks > 1: + loss_dict_unsup = self.model._layers.get_distill_loss( + student_preds, + teacher_preds, + ratio=train_cfg['ratio']) + else: + loss_dict_unsup = self.model.get_distill_loss( + student_preds, + teacher_preds, + ratio=train_cfg['ratio']) + + fg_num = loss_dict_unsup["fg_sum"] + del loss_dict_unsup["fg_sum"] + distill_weights = train_cfg['loss_weight'] + loss_dict_unsup = { + k: v * distill_weights[k] + for k, v in loss_dict_unsup.items() + } + + losses_unsup = sum([ + metrics_value + for metrics_value in loss_dict_unsup.values() + ]) * unsup_weight + losses_unsup.backward() + + loss_dict.update(loss_dict_unsup) + loss_dict.update({'loss_unsup_sum': losses_unsup}) + losses += losses_unsup.detach() + loss_dict.update({"fg_sum": fg_num}) + loss_dict['loss'] = losses + + self.optimizer.step() + curr_lr = self.optimizer.get_lr() + self.lr.step() + self.optimizer.clear_grad() + self.status['learning_rate'] = curr_lr + if self._nranks < 2 or self._local_rank == 0: + self.status['training_staus'].update(loss_dict) + + self.status['batch_time'].update(time.time() - iter_tic) + self._compose_callback.on_step_end(self.status) + # Note: ema_start_iters + if self.use_ema and curr_iter == self.ema_start_iters: + logger.info("***" * 30) + logger.info('EMA starting ...') + logger.info("***" * 30) + self.ema.update(self.model, decay=0) + elif self.use_ema and curr_iter > self.ema_start_iters: + self.ema.update(self.model) + iter_tic = time.time() + + is_snapshot = (self._nranks < 2 or self._local_rank == 0) \ + and ((epoch_id + 1) % self.cfg.snapshot_epoch == 0 or epoch_id == self.end_epoch - 1) + if is_snapshot and self.use_ema: + # apply ema weight on model + weight = copy.deepcopy(self.ema.model.state_dict()) + for k, v in weight.items(): + if paddle.is_floating_point(v): + weight[k].stop_gradient = True + self.status['weight'] = weight + + self._compose_callback.on_epoch_end(self.status) + + if validate and is_snapshot: + if not hasattr(self, '_eval_loader'): + # build evaluation dataset and loader + self._eval_dataset = self.cfg.EvalDataset + self._eval_batch_sampler = \ + paddle.io.BatchSampler( + self._eval_dataset, + batch_size=self.cfg.EvalReader['batch_size']) + # If metric is VOC, need to be set collate_batch=False. + if self.cfg.metric == 'VOC': + self.cfg['EvalReader']['collate_batch'] = False + self._eval_loader = create('EvalReader')( + self._eval_dataset, + self.cfg.worker_num, + batch_sampler=self._eval_batch_sampler) + # if validation in training is enabled, metrics should be re-init + # Init_mark makes sure this code will only execute once + if validate and Init_mark == False: + Init_mark = True + self._init_metrics(validate=validate) + self._reset_metrics() + + with paddle.no_grad(): + self.status['save_best_model'] = True + self._eval_with_loader(self._eval_loader) + + if is_snapshot and self.use_ema: + self.status.pop('weight') + + self._compose_callback.on_train_end(self.status) + + def evaluate(self): + # get distributed model + if self.cfg.get('fleet', False): + self.model = fleet.distributed_model(self.model) + self.optimizer = fleet.distributed_optimizer(self.optimizer) + elif self._nranks > 1: + find_unused_parameters = self.cfg[ + 'find_unused_parameters'] if 'find_unused_parameters' in self.cfg else False + self.model = paddle.DataParallel( + self.model, find_unused_parameters=find_unused_parameters) + with paddle.no_grad(): + self._eval_with_loader(self.loader) + + def _eval_with_loader(self, loader): + sample_num = 0 + tic = time.time() + self._compose_callback.on_epoch_begin(self.status) + self.status['mode'] = 'eval' + + test_cfg = self.cfg.DenseTeacher['test_cfg'] + if test_cfg['inference_on'] == 'teacher': + logger.info("***** teacher model evaluating *****") + eval_model = self.ema.model + else: + logger.info("***** student model evaluating *****") + eval_model = self.model + + eval_model.eval() + if self.cfg.get('print_flops', False): + flops_loader = create('{}Reader'.format(self.mode.capitalize()))( + self.dataset, self.cfg.worker_num, self._eval_batch_sampler) + self._flops(flops_loader) + for step_id, data in enumerate(loader): + self.status['step_id'] = step_id + self._compose_callback.on_step_begin(self.status) + # forward + if self.use_amp: + with paddle.amp.auto_cast( + enable=self.cfg.use_gpu or self.cfg.use_mlu, + custom_white_list=self.custom_white_list, + custom_black_list=self.custom_black_list, + level=self.amp_level): + outs = eval_model(data) + else: + outs = eval_model(data) + + # update metrics + for metric in self._metrics: + metric.update(data, outs) + + # multi-scale inputs: all inputs have same im_id + if isinstance(data, typing.Sequence): + sample_num += data[0]['im_id'].numpy().shape[0] + else: + sample_num += data['im_id'].numpy().shape[0] + self._compose_callback.on_step_end(self.status) + + self.status['sample_num'] = sample_num + self.status['cost_time'] = time.time() - tic + + # accumulate metric to log out + for metric in self._metrics: + metric.accumulate() + metric.log() + self._compose_callback.on_epoch_end(self.status) + # reset metric states for metric may performed multiple times + self._reset_metrics() diff --git a/ppdet/modeling/__init__.py b/ppdet/modeling/__init__.py index ded7c8fb8..601b14f0d 100644 --- a/ppdet/modeling/__init__.py +++ b/ppdet/modeling/__init__.py @@ -30,6 +30,7 @@ from . import mot from . import transformers from . import assigners from . import rbox_utils +from . import ssod_utils from .ops import * from .backbones import * @@ -45,3 +46,4 @@ from .mot import * from .transformers import * from .assigners import * from .rbox_utils import * +from .ssod_utils import * diff --git a/ppdet/modeling/architectures/fcos.py b/ppdet/modeling/architectures/fcos.py index 28cb7062c..615761ecf 100644 --- a/ppdet/modeling/architectures/fcos.py +++ b/ppdet/modeling/architectures/fcos.py @@ -17,8 +17,11 @@ from __future__ import division from __future__ import print_function import paddle +import paddle.nn.functional as F from ppdet.core.workspace import register, create from .meta_arch import BaseArch +from ..ssod_utils import permute_to_N_HWA_K, QFLv2 +from ..losses import GIoULoss __all__ = ['FCOS'] @@ -41,6 +44,7 @@ class FCOS(BaseArch): self.backbone = backbone self.neck = neck self.fcos_head = fcos_head + self.is_teacher = False @classmethod def from_config(cls, cfg, *args, **kwargs): @@ -62,7 +66,8 @@ class FCOS(BaseArch): body_feats = self.backbone(self.inputs) fpn_feats = self.neck(body_feats) - if self.training: + self.is_teacher = self.inputs.get('is_teacher', False) + if self.training or self.is_teacher: losses = self.fcos_head(fpn_feats, self.inputs) return losses else: @@ -76,3 +81,95 @@ class FCOS(BaseArch): def get_pred(self): return self._forward() + + def get_loss_keys(self): + return ['loss_cls', 'loss_box', 'loss_quality'] + + def get_distill_loss(self, + fcos_head_outs, + teacher_fcos_head_outs, + ratio=0.01): + student_logits, student_deltas, student_quality = fcos_head_outs + teacher_logits, teacher_deltas, teacher_quality = teacher_fcos_head_outs + nc = student_logits[0].shape[1] + + student_logits = paddle.concat( + [ + _.transpose([0, 2, 3, 1]).reshape([-1, nc]) + for _ in student_logits + ], + axis=0) + teacher_logits = paddle.concat( + [ + _.transpose([0, 2, 3, 1]).reshape([-1, nc]) + for _ in teacher_logits + ], + axis=0) + + student_deltas = paddle.concat( + [ + _.transpose([0, 2, 3, 1]).reshape([-1, 4]) + for _ in student_deltas + ], + axis=0) + teacher_deltas = paddle.concat( + [ + _.transpose([0, 2, 3, 1]).reshape([-1, 4]) + for _ in teacher_deltas + ], + axis=0) + + student_quality = paddle.concat( + [ + _.transpose([0, 2, 3, 1]).reshape([-1, 1]) + for _ in student_quality + ], + axis=0) + teacher_quality = paddle.concat( + [ + _.transpose([0, 2, 3, 1]).reshape([-1, 1]) + for _ in teacher_quality + ], + axis=0) + + with paddle.no_grad(): + # Region Selection + count_num = int(teacher_logits.shape[0] * ratio) + teacher_probs = F.sigmoid(teacher_logits) + max_vals = paddle.max(teacher_probs, 1) + sorted_vals, sorted_inds = paddle.topk(max_vals, + teacher_logits.shape[0]) + mask = paddle.zeros_like(max_vals) + mask[sorted_inds[:count_num]] = 1. + fg_num = sorted_vals[:count_num].sum() + b_mask = mask > 0 + + # distill_loss_cls + loss_logits = QFLv2( + F.sigmoid(student_logits), + teacher_probs, + weight=mask, + reduction="sum") / fg_num + + # distill_loss_box + inputs = paddle.concat( + (-student_deltas[b_mask][..., :2], student_deltas[b_mask][..., 2:]), + axis=-1) + targets = paddle.concat( + (-teacher_deltas[b_mask][..., :2], teacher_deltas[b_mask][..., 2:]), + axis=-1) + iou_loss = GIoULoss(reduction='mean') + loss_deltas = iou_loss(inputs, targets) + + # distill_loss_quality + loss_quality = F.binary_cross_entropy( + F.sigmoid(student_quality[b_mask]), + F.sigmoid(teacher_quality[b_mask]), + reduction='mean') + + return { + "distill_loss_cls": loss_logits, + "distill_loss_box": loss_deltas, + "distill_loss_quality": loss_quality, + "fg_sum": fg_num, + } diff --git a/ppdet/modeling/heads/fcos_head.py b/ppdet/modeling/heads/fcos_head.py index 79b69f08d..d6dab8c8d 100644 --- a/ppdet/modeling/heads/fcos_head.py +++ b/ppdet/modeling/heads/fcos_head.py @@ -136,6 +136,7 @@ class FCOSHead(nn.Layer): fcos_feat='FCOSFeat', fpn_stride=[8, 16, 32, 64, 128], prior_prob=0.01, + multiply_strides_reg_targets=False, norm_reg_targets=True, centerness_on_reg=True, num_shift=0.5, @@ -151,11 +152,13 @@ class FCOSHead(nn.Layer): self.fcos_loss = fcos_loss self.norm_reg_targets = norm_reg_targets self.centerness_on_reg = centerness_on_reg + self.multiply_strides_reg_targets = multiply_strides_reg_targets self.num_shift = num_shift self.nms = nms if isinstance(self.nms, MultiClassNMS) and trt: self.nms.trt = trt self.sqrt_score = sqrt_score + self.is_teacher = False conv_cls_name = "fcos_head_cls" bias_init_value = -math.log((1 - self.prior_prob) / self.prior_prob) @@ -246,16 +249,29 @@ class FCOSHead(nn.Layer): centerness = self.fcos_head_centerness(fcos_cls_feat) if self.norm_reg_targets: bbox_reg = F.relu(bbox_reg) - if not self.training: - # eval or infer + if self.multiply_strides_reg_targets: bbox_reg = bbox_reg * fpn_stride + else: + if not self.training or targets.get( + 'get_data', + False) or targets.get('is_teacher', False): + bbox_reg = bbox_reg * fpn_stride else: bbox_reg = paddle.exp(bbox_reg) cls_logits_list.append(cls_logits) bboxes_reg_list.append(bbox_reg) centerness_list.append(centerness) - if self.training: + if targets is not None: + self.is_teacher = targets.get('is_teacher', False) + if self.is_teacher: + return [cls_logits_list, bboxes_reg_list, centerness_list] + + if self.training and targets is not None: + get_data = targets.get('get_data', False) + if get_data: + return [cls_logits_list, bboxes_reg_list, centerness_list] + losses = {} fcos_head_outs = [cls_logits_list, bboxes_reg_list, centerness_list] losses_fcos = self.get_loss(fcos_head_outs, targets) diff --git a/ppdet/modeling/losses/fcos_loss.py b/ppdet/modeling/losses/fcos_loss.py index 0cd6b581b..6ff52bc2a 100644 --- a/ppdet/modeling/losses/fcos_loss.py +++ b/ppdet/modeling/losses/fcos_loss.py @@ -217,12 +217,11 @@ class FCOSLoss(nn.Layer): mask_positive_float = paddle.squeeze(mask_positive_float, axis=-1) tag_center_flatten = paddle.squeeze(tag_center_flatten, axis=-1) reg_loss = self.__iou_loss( - bboxes_reg_flatten, # [61570, 4] + bboxes_reg_flatten, tag_bboxes_flatten, - mask_positive_float, # [61570] sum 57 - weights=tag_center_flatten - ) # [61570] tag_center_flatten.sum()=34.43262482 - reg_loss = reg_loss * mask_positive_float / normalize_sum # 34.43262482 + mask_positive_float, + weights=tag_center_flatten) + reg_loss = reg_loss * mask_positive_float / normalize_sum # 3. centerness: sigmoid_cross_entropy_with_logits_loss centerness_flatten = paddle.squeeze(centerness_flatten, axis=-1) diff --git a/ppdet/modeling/ssod_utils.py b/ppdet/modeling/ssod_utils.py new file mode 100644 index 000000000..a0c0a95b1 --- /dev/null +++ b/ppdet/modeling/ssod_utils.py @@ -0,0 +1,93 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn.functional as F + + +def align_weak_strong_shape(data_weak, data_strong): + max_shape_x = max(data_strong['image'].shape[2], + data_weak['image'].shape[2]) + max_shape_y = max(data_strong['image'].shape[3], + data_weak['image'].shape[3]) + + scale_x_s = max_shape_x / data_strong['image'].shape[2] + scale_y_s = max_shape_y / data_strong['image'].shape[3] + scale_x_w = max_shape_x / data_weak['image'].shape[2] + scale_y_w = max_shape_y / data_weak['image'].shape[3] + target_size = [max_shape_x, max_shape_y] + + if scale_x_s != 1 or scale_y_s != 1: + data_strong['image'] = F.interpolate( + data_strong['image'], + size=target_size, + mode='bilinear', + align_corners=False) + if 'gt_bbox' in data_strong: + gt_bboxes = data_strong['gt_bbox'] + for i in range(len(gt_bboxes)): + if len(gt_bboxes[i]) > 0: + gt_bboxes[i][:, 0::2] = gt_bboxes[i][:, 0::2] * scale_x_s + gt_bboxes[i][:, 1::2] = gt_bboxes[i][:, 1::2] * scale_y_s + data_strong['gt_bbox'] = gt_bboxes + + if scale_x_w != 1 or scale_y_w != 1: + data_weak['image'] = F.interpolate( + data_weak['image'], + size=target_size, + mode='bilinear', + align_corners=False) + if 'gt_bbox' in data_weak: + gt_bboxes = data_weak['gt_bbox'] + for i in range(len(gt_bboxes)): + if len(gt_bboxes[i]) > 0: + gt_bboxes[i][:, 0::2] = gt_bboxes[i][:, 0::2] * scale_x_w + gt_bboxes[i][:, 1::2] = gt_bboxes[i][:, 1::2] * scale_y_w + data_weak['gt_bbox'] = gt_bboxes + return data_weak, data_strong + + +def permute_to_N_HWA_K(tensor, K): + """ + Transpose/reshape a tensor from (N, (A x K), H, W) to (N, (HxWxA), K) + """ + assert tensor.dim() == 4, tensor.shape + N, _, H, W = tensor.shape + tensor = tensor.reshape([N, -1, K, H, W]).transpose([0, 3, 4, 1, 2]) + tensor = tensor.reshape([N, -1, K]) + return tensor + + +def QFLv2(pred_sigmoid, + teacher_sigmoid, + weight=None, + beta=2.0, + reduction='mean'): + pt = pred_sigmoid + zerolabel = paddle.zeros_like(pt) + loss = F.binary_cross_entropy( + pred_sigmoid, zerolabel, reduction='none') * pt.pow(beta) + pos = weight > 0 + + pt = teacher_sigmoid[pos] - pred_sigmoid[pos] + loss[pos] = F.binary_cross_entropy( + pred_sigmoid[pos], teacher_sigmoid[pos], + reduction='none') * pt.pow(beta) + + valid = weight >= 0 + if reduction == "mean": + loss = loss[valid].mean() + elif reduction == "sum": + loss = loss[valid].sum() + return loss diff --git a/ppdet/optimizer/__init__.py b/ppdet/optimizer/__init__.py index 61737923e..aa690dc85 100644 --- a/ppdet/optimizer/__init__.py +++ b/ppdet/optimizer/__init__.py @@ -12,5 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from . import optimizer +from . import ema + from .optimizer import * -from .ema import ModelEMA +from .ema import * diff --git a/ppdet/optimizer/ema.py b/ppdet/optimizer/ema.py index 927d357b4..2fade4dcf 100644 --- a/ppdet/optimizer/ema.py +++ b/ppdet/optimizer/ema.py @@ -19,6 +19,9 @@ from __future__ import print_function import math import paddle import weakref +from copy import deepcopy + +__all__ = ['ModelEMA', 'SimpleModelEMA'] class ModelEMA(object): @@ -134,3 +137,48 @@ class ModelEMA(object): if key in name: out_list.add(name) return out_list + + +class SimpleModelEMA(object): + """ + Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models + Keep a moving average of everything in the model state_dict (parameters and buffers). + This is intended to allow functionality like + https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage + A smoothed version of the weights is necessary for some training schemes to perform well. + This class is sensitive where it is initialized in the sequence of model init, + GPU assignment and distributed training wrappers. + """ + + def __init__(self, model=None, decay=0.9996): + """ + Args: + model (nn.Module): model to apply EMA. + decay (float): ema decay reate. + """ + self.model = deepcopy(model) + self.decay = decay + + def update(self, model, decay=None): + if decay is None: + decay = self.decay + + with paddle.no_grad(): + state = {} + msd = model.state_dict() + for k, v in self.model.state_dict().items(): + if paddle.is_floating_point(v): + v *= decay + v += (1.0 - decay) * msd[k].detach() + state[k] = v + self.model.set_state_dict(state) + + def resume(self, state_dict, step=0): + state = {} + msd = state_dict + for k, v in self.model.state_dict().items(): + if paddle.is_floating_point(v): + v = msd[k].detach() + state[k] = v + self.model.set_state_dict(state) + self.step = step diff --git a/ppdet/optimizer/optimizer.py b/ppdet/optimizer/optimizer.py index 1d42eaa2d..2d0714078 100644 --- a/ppdet/optimizer/optimizer.py +++ b/ppdet/optimizer/optimizer.py @@ -274,11 +274,13 @@ class OptimizerBuilder(): def __init__(self, clip_grad_by_norm=None, + clip_grad_by_value=None, regularizer={'type': 'L2', 'factor': .0001}, optimizer={'type': 'Momentum', 'momentum': .9}): self.clip_grad_by_norm = clip_grad_by_norm + self.clip_grad_by_value = clip_grad_by_value self.regularizer = regularizer self.optimizer = optimizer @@ -286,6 +288,9 @@ class OptimizerBuilder(): if self.clip_grad_by_norm is not None: grad_clip = nn.ClipGradByGlobalNorm( clip_norm=self.clip_grad_by_norm) + elif self.clip_grad_by_value is not None: + var = abs(self.clip_grad_by_value) + grad_clip = nn.ClipGradByValue(min=-var, max=var) else: grad_clip = None if self.regularizer and self.regularizer != 'None': diff --git a/ppdet/utils/checkpoint.py b/ppdet/utils/checkpoint.py index add087c89..f57ef0227 100644 --- a/ppdet/utils/checkpoint.py +++ b/ppdet/utils/checkpoint.py @@ -62,7 +62,7 @@ def _strip_postfix(path): return path -def load_weight(model, weight, optimizer=None, ema=None): +def load_weight(model, weight, optimizer=None, ema=None, exchange=True): if is_url(weight): weight = get_weights_path(weight) @@ -73,9 +73,21 @@ def load_weight(model, weight, optimizer=None, ema=None): "exists.".format(pdparam_path)) if ema is not None and os.path.exists(path + '.pdema'): - # Exchange model and ema_model to load - ema_state_dict = paddle.load(pdparam_path) - param_state_dict = paddle.load(path + '.pdema') + if exchange: + # Exchange model and ema_model to load + logger.info('Exchange model and ema_model to load:') + ema_state_dict = paddle.load(pdparam_path) + logger.info('Loading ema_model weights from {}'.format(path + + '.pdparams')) + param_state_dict = paddle.load(path + '.pdema') + logger.info('Loading model weights from {}'.format(path + '.pdema')) + else: + ema_state_dict = paddle.load(path + '.pdema') + logger.info('Loading ema_model weights from {}'.format(path + + '.pdema')) + param_state_dict = paddle.load(pdparam_path) + logger.info('Loading model weights from {}'.format(path + + '.pdparams')) else: ema_state_dict = None param_state_dict = paddle.load(pdparam_path) diff --git a/tools/train.py b/tools/train.py index 9c2f8aabf..6f0d2a6d3 100755 --- a/tools/train.py +++ b/tools/train.py @@ -31,6 +31,7 @@ import paddle from ppdet.core.workspace import load_config, merge_config from ppdet.engine import Trainer, init_parallel_env, set_random_seed, init_fleet_env +from ppdet.engine.trainer_ssod import Trainer_DenseTeacher from ppdet.slim import build_slim_model from ppdet.utils.cli import ArgsParser, merge_args @@ -124,8 +125,17 @@ def run(FLAGS, cfg): if FLAGS.enable_ce: set_random_seed(0) - # build trainer - trainer = Trainer(cfg, mode='train') + ssod_method = cfg.get('ssod_method', None) + if ssod_method is not None: + if ssod_method == 'DenseTeacher': + trainer = Trainer_DenseTeacher(cfg, mode='train') + else: + raise ValueError( + "Semi-Supervised Object Detection only support DenseTeacher now." + ) + else: + # build trainer + trainer = Trainer(cfg, mode='train') # load weights if FLAGS.resume is not None: -- GitLab