From 1e21400ed7a9a6d61f62d4ffc99c2823587dc5cf Mon Sep 17 00:00:00 2001 From: Feng Ni Date: Tue, 7 Feb 2023 20:06:43 +0800 Subject: [PATCH] Add ppyoloe semi-det base codes (#7680) * add ppyoloe semi-det base codes * fix configs * fix head distill loss * add more semi_det configs and fix doc, test=document_fix * add contrast_loss config, test=document_fix --- configs/semi_det/baseline/README.md | 3 + .../ppyoloe_plus_crn_l_80e_coco_sup005.yml | 29 ++++ .../ppyoloe_plus_crn_l_80e_coco_sup010.yml | 29 ++++ configs/semi_det/denseteacher/README.md | 12 +- ...eacher_ppyoloe_plus_crn_l_coco_semi005.yml | 151 ++++++++++++++++++ ...eacher_ppyoloe_plus_crn_l_coco_semi010.yml | 151 ++++++++++++++++++ ...eacher_ppyoloe_plus_crn_s_coco_semi005.yml | 151 ++++++++++++++++++ ...eacher_ppyoloe_plus_crn_s_coco_semi010.yml | 151 ++++++++++++++++++ ppdet/data/transform/operators.py | 31 ++++ ppdet/engine/trainer_ssod.py | 17 +- ppdet/modeling/architectures/fcos.py | 11 +- ppdet/modeling/architectures/ppyoloe.py | 118 +++++++++++++- ppdet/modeling/heads/ppyoloe_head.py | 35 +++- ppdet/modeling/ssod_utils.py | 8 +- 14 files changed, 869 insertions(+), 28 deletions(-) create mode 100644 configs/semi_det/baseline/ppyoloe_plus_crn_l_80e_coco_sup005.yml create mode 100644 configs/semi_det/baseline/ppyoloe_plus_crn_l_80e_coco_sup010.yml create mode 100644 configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_l_coco_semi005.yml create mode 100644 configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_l_coco_semi010.yml create mode 100644 configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_s_coco_semi005.yml create mode 100644 configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_s_coco_semi010.yml diff --git a/configs/semi_det/baseline/README.md b/configs/semi_det/baseline/README.md index 58e7795e0..457ad7f7c 100644 --- a/configs/semi_det/baseline/README.md +++ b/configs/semi_det/baseline/README.md @@ -21,6 +21,9 @@ | PP-YOLOE+_s | 5% | 80 (7200) | 32.8 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_s_80e_coco_sup005.pdparams) | [config](ppyoloe_plus_crn_s_80e_coco_sup005.yml) | | PP-YOLOE+_s | 10% | 80 (14480) | 35.3 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_s_80e_coco_sup010.pdparams) | [config](ppyoloe_plus_crn_s_80e_coco_sup010.yml) | | PP-YOLOE+_s | full | 80 (146560) | 43.7 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_s_80e_coco.pdparams) | [config](../../ppyoloe/ppyoloe_plus_crn_s_80e_coco.yml) | +| PP-YOLOE+_l | 5% | 80 (7200) | 42.9 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_coco_sup005.pdparams) | [config](ppyoloe_plus_crn_l_80e_coco_sup005.yml) | +| PP-YOLOE+_l | 10% | 80 (14480) | 45.7 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_coco_sup010.pdparams) | [config](ppyoloe_plus_crn_l_80e_coco_sup010.yml) | +| PP-YOLOE+_l | full | 80 (146560) | 49.8 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_coco.pdparams) | [config](../../ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml) | **注意:** - 以上模型训练默认使用8 GPUs,总batch_size默认为64,默认初始学习率为0.001。如果改动了总batch_size,请按线性比例相应地调整学习率。 diff --git a/configs/semi_det/baseline/ppyoloe_plus_crn_l_80e_coco_sup005.yml b/configs/semi_det/baseline/ppyoloe_plus_crn_l_80e_coco_sup005.yml new file mode 100644 index 000000000..4dd4a898e --- /dev/null +++ b/configs/semi_det/baseline/ppyoloe_plus_crn_l_80e_coco_sup005.yml @@ -0,0 +1,29 @@ +_BASE_: [ + '../../ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml', +] +log_iter: 50 +snapshot_epoch: 5 +weights: output/ppyoloe_plus_crn_l_80e_coco_sup005/model_final + +pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_l_obj365_pretrained.pdparams +depth_mult: 1.0 +width_mult: 1.0 + + +TrainDataset: + !COCODataSet + image_dir: train2017 + anno_path: semi_annotations/instances_train2017.1@5.json + dataset_dir: dataset/coco + data_fields: ['image', 'gt_bbox', 'gt_class'] + + +epoch: 80 +LearningRate: + base_lr: 0.001 + schedulers: + - !CosineDecay + max_epochs: 96 + - !LinearWarmup + start_factor: 0. + epochs: 5 diff --git a/configs/semi_det/baseline/ppyoloe_plus_crn_l_80e_coco_sup010.yml b/configs/semi_det/baseline/ppyoloe_plus_crn_l_80e_coco_sup010.yml new file mode 100644 index 000000000..647252175 --- /dev/null +++ b/configs/semi_det/baseline/ppyoloe_plus_crn_l_80e_coco_sup010.yml @@ -0,0 +1,29 @@ +_BASE_: [ + '../../ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml', +] +log_iter: 50 +snapshot_epoch: 5 +weights: output/ppyoloe_plus_crn_l_80e_coco_sup010/model_final + +pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_l_obj365_pretrained.pdparams +depth_mult: 1.0 +width_mult: 1.0 + + +TrainDataset: + !COCODataSet + image_dir: train2017 + anno_path: semi_annotations/instances_train2017.1@10.json + dataset_dir: dataset/coco + data_fields: ['image', 'gt_bbox', 'gt_class'] + + +epoch: 80 +LearningRate: + base_lr: 0.001 + schedulers: + - !CosineDecay + max_epochs: 96 + - !LinearWarmup + start_factor: 0. + epochs: 5 diff --git a/configs/semi_det/denseteacher/README.md b/configs/semi_det/denseteacher/README.md index 76c3c2fdd..7c629cc7c 100644 --- a/configs/semi_det/denseteacher/README.md +++ b/configs/semi_det/denseteacher/README.md @@ -2,7 +2,7 @@ # Dense Teacher: Dense Pseudo-Labels for Semi-supervised Object Detection -## 模型库 +## FCOS模型库 | 模型 | 监督数据比例 | Sup Baseline | Sup Epochs (Iters) | Sup mAPval
0.5:0.95 | Semi mAPval
0.5:0.95 | Semi Epochs (Iters) | 模型下载 | 配置文件 | | :------------: | :---------: | :---------------------: | :---------------------: |:---------------------------: |:----------------------------: | :------------------: |:--------: |:----------: | @@ -34,6 +34,16 @@ ``` +## PPYOLOE+ 模型库 + +| 模型 | 监督数据比例 | Sup Baseline | Sup Epochs (Iters) | Sup mAPval
0.5:0.95 | Semi mAPval
0.5:0.95 | Semi Epochs (Iters) | 模型下载 | 配置文件 | +| :------------: | :---------: | :---------------------: | :---------------------: |:---------------------------: |:----------------------------: | :------------------: |:--------: |:----------: | +| DenseTeacher-PPYOLOE+_s | 5% | [sup_config](../baseline/ppyoloe_plus_crn_s_80e_coco_sup005.yml) | 80 (14480) | 32.8 | **34.0** | 200 (36200) | [download](https://paddledet.bj.bcebos.com/models/denseteacher_ppyoloe_plus_crn_s_coco_semi005.pdparams) | [config](./denseteacher_ppyoloe_plus_crn_s_coco_semi005.yml) | +| DenseTeacher-PPYOLOE+_s | 10% | [sup_config](../baseline/ppyoloe_plus_crn_s_80e_coco_sup010.yml) | 80 (14480) | 35.3 | **37.5** | 200 (36200) | [download](https://paddledet.bj.bcebos.com/models/denseteacher_ppyoloe_plus_crn_s_coco_semi010.pdparams) | [config](./denseteacher_ppyoloe_plus_crn_s_coco_semi010.yml) | +| DenseTeacher-PPYOLOE+_l | 5% | [sup_config](../baseline/ppyoloe_plus_crn_s_80e_coco_sup005.yml) | 80 (14480) | 42.9 | **45.4** | 200 (36200) | [download](https://paddledet.bj.bcebos.com/models/denseteacher_ppyoloe_plus_crn_l_coco_semi005.pdparams) | [config](./denseteacher_ppyoloe_plus_crn_l_coco_semi005.yml) | +| DenseTeacher-PPYOLOE+_l | 10% | [sup_config](../baseline/ppyoloe_plus_crn_l_80e_coco_sup010.yml) | 80 (14480) | 45.7 | **47.4** | 200 (36200) | [download](https://paddledet.bj.bcebos.com/models/denseteacher_ppyoloe_plus_crn_l_coco_semi010.pdparams) | [config](./denseteacher_ppyoloe_plus_crn_l_coco_semi010.yml) | + + ## 使用说明 仅训练时必须使用半监督检测的配置文件去训练,评估、预测、部署也可以按基础检测器的配置文件去执行。 diff --git a/configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_l_coco_semi005.yml b/configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_l_coco_semi005.yml new file mode 100644 index 000000000..25159a8c0 --- /dev/null +++ b/configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_l_coco_semi005.yml @@ -0,0 +1,151 @@ +_BASE_: [ + '../../ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml', + '../_base_/coco_detection_percent_5.yml', +] +log_iter: 50 +snapshot_epoch: 5 +weights: output/denseteacher_ppyoloe_plus_crn_l_coco_semi005/model_final + +epochs: &epochs 200 +cosine_epochs: &cosine_epochs 240 + + +### pretrain and warmup config, choose one and comment another +pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/semi_det/ppyoloe_plus_crn_l_80e_coco_sup005.pdparams # mAP=42.9 +semi_start_iters: 0 +ema_start_iters: 0 +use_warmup: &use_warmup False + +# pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_l_obj365_pretrained.pdparams +# semi_start_iters: 5000 +# ema_start_iters: 3000 +# use_warmup: &use_warmup True + + +### global config +use_simple_ema: True +ema_decay: 0.9996 +ssod_method: DenseTeacher +DenseTeacher: + train_cfg: + sup_weight: 1.0 + unsup_weight: 1.0 + loss_weight: {distill_loss_cls: 1.0, distill_loss_iou: 2.5, distill_loss_dfl: 0., distill_loss_contrast: 0.1} + contrast_loss: + temperature: 0.2 + alpha: 0.9 + smooth_iter: 100 + concat_sup_data: True + suppress: linear + ratio: 0.01 + test_cfg: + inference_on: teacher + + +### reader config +batch_size: &batch_size 8 +worker_num: 2 +SemiTrainReader: + sample_transforms: + - Decode: {} + - RandomDistort: {} + - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} + - RandomFlip: {} + - RandomCrop: {} # unsup will be fake gt_boxes + weak_aug: + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], is_scale: true, norm_type: none} + strong_aug: + - StrongAugImage: {transforms: [ + RandomColorJitter: {prob: 0.8, brightness: 0.4, contrast: 0.4, saturation: 0.4, hue: 0.1}, + RandomErasingCrop: {}, + RandomGaussianBlur: {prob: 0.5, sigma: [0.1, 2.0]}, + RandomGrayscale: {prob: 0.2}, + ]} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], is_scale: true, norm_type: none} + sup_batch_transforms: + - BatchRandomResize: {target_size: [640], random_size: True, random_interp: True, keep_ratio: False} + - Permute: {} + - PadGT: {} + unsup_batch_transforms: + - BatchRandomResize: {target_size: [640], random_size: True, random_interp: True, keep_ratio: False} + - Permute: {} + sup_batch_size: *batch_size + unsup_batch_size: *batch_size + shuffle: True + drop_last: True + collate_batch: True + +EvalReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 2 + +TestReader: + inputs_def: + image_shape: [3, 640, 640] + sample_transforms: + - Decode: {} + - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 1 + + +### model config +architecture: PPYOLOE +norm_type: sync_bn +ema_black_list: ['proj_conv.weight'] +custom_black_list: ['reduce_mean'] +PPYOLOE: + backbone: CSPResNet + neck: CustomCSPPAN + yolo_head: PPYOLOEHead + post_process: ~ + +eval_size: ~ # means None, but not str 'None' +PPYOLOEHead: + fpn_strides: [32, 16, 8] + grid_cell_scale: 5.0 + grid_cell_offset: 0.5 + static_assigner_epoch: -1 # + use_varifocal_loss: True + loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5} + static_assigner: + name: ATSSAssigner + topk: 9 + assigner: + name: TaskAlignedAssigner + topk: 13 + alpha: 1.0 + beta: 6.0 + nms: + name: MultiClassNMS + nms_top_k: 1000 + keep_top_k: 300 + score_threshold: 0.01 + nms_threshold: 0.7 + + +### other config +epoch: *epochs +LearningRate: + base_lr: 0.01 + schedulers: + - !CosineDecay + max_epochs: *cosine_epochs + use_warmup: *use_warmup + - !LinearWarmup + start_factor: 0.001 + epochs: 3 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 # dt-fcos 0.0001 + type: L2 + clip_grad_by_norm: 1.0 # dt-fcos clip_grad_by_value diff --git a/configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_l_coco_semi010.yml b/configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_l_coco_semi010.yml new file mode 100644 index 000000000..24aa642bf --- /dev/null +++ b/configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_l_coco_semi010.yml @@ -0,0 +1,151 @@ +_BASE_: [ + '../../ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml', + '../_base_/coco_detection_percent_10.yml', +] +log_iter: 50 +snapshot_epoch: 5 +weights: output/denseteacher_ppyoloe_plus_crn_l_coco_semi010/model_final + +epochs: &epochs 200 +cosine_epochs: &cosine_epochs 240 + + +### pretrain and warmup config, choose one and comment another +pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/semi_det/ppyoloe_plus_crn_l_80e_coco_sup010.pdparams # mAP=45.7 +semi_start_iters: 0 +ema_start_iters: 0 +use_warmup: &use_warmup False + +# pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_l_obj365_pretrained.pdparams +# semi_start_iters: 5000 +# ema_start_iters: 3000 +# use_warmup: &use_warmup True + + +### global config +use_simple_ema: True +ema_decay: 0.9996 +ssod_method: DenseTeacher +DenseTeacher: + train_cfg: + sup_weight: 1.0 + unsup_weight: 1.0 + loss_weight: {distill_loss_cls: 1.0, distill_loss_iou: 2.5, distill_loss_dfl: 0., distill_loss_contrast: 0.1} + contrast_loss: + temperature: 0.2 + alpha: 0.9 + smooth_iter: 100 + concat_sup_data: True + suppress: linear + ratio: 0.01 + test_cfg: + inference_on: teacher + + +### reader config +batch_size: &batch_size 8 +worker_num: 2 +SemiTrainReader: + sample_transforms: + - Decode: {} + - RandomDistort: {} + - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} + - RandomFlip: {} + - RandomCrop: {} # unsup will be fake gt_boxes + weak_aug: + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], is_scale: true, norm_type: none} + strong_aug: + - StrongAugImage: {transforms: [ + RandomColorJitter: {prob: 0.8, brightness: 0.4, contrast: 0.4, saturation: 0.4, hue: 0.1}, + RandomErasingCrop: {}, + RandomGaussianBlur: {prob: 0.5, sigma: [0.1, 2.0]}, + RandomGrayscale: {prob: 0.2}, + ]} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], is_scale: true, norm_type: none} + sup_batch_transforms: + - BatchRandomResize: {target_size: [640], random_size: True, random_interp: True, keep_ratio: False} + - Permute: {} + - PadGT: {} + unsup_batch_transforms: + - BatchRandomResize: {target_size: [640], random_size: True, random_interp: True, keep_ratio: False} + - Permute: {} + sup_batch_size: *batch_size + unsup_batch_size: *batch_size + shuffle: True + drop_last: True + collate_batch: True + +EvalReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 2 + +TestReader: + inputs_def: + image_shape: [3, 640, 640] + sample_transforms: + - Decode: {} + - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 1 + + +### model config +architecture: PPYOLOE +norm_type: sync_bn +ema_black_list: ['proj_conv.weight'] +custom_black_list: ['reduce_mean'] +PPYOLOE: + backbone: CSPResNet + neck: CustomCSPPAN + yolo_head: PPYOLOEHead + post_process: ~ + +eval_size: ~ # means None, but not str 'None' +PPYOLOEHead: + fpn_strides: [32, 16, 8] + grid_cell_scale: 5.0 + grid_cell_offset: 0.5 + static_assigner_epoch: -1 # + use_varifocal_loss: True + loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5} + static_assigner: + name: ATSSAssigner + topk: 9 + assigner: + name: TaskAlignedAssigner + topk: 13 + alpha: 1.0 + beta: 6.0 + nms: + name: MultiClassNMS + nms_top_k: 1000 + keep_top_k: 300 + score_threshold: 0.01 + nms_threshold: 0.7 + + +### other config +epoch: *epochs +LearningRate: + base_lr: 0.01 + schedulers: + - !CosineDecay + max_epochs: *cosine_epochs + use_warmup: *use_warmup + - !LinearWarmup + start_factor: 0.001 + epochs: 3 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 # dt-fcos 0.0001 + type: L2 + clip_grad_by_norm: 1.0 # dt-fcos clip_grad_by_value diff --git a/configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_s_coco_semi005.yml b/configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_s_coco_semi005.yml new file mode 100644 index 000000000..86661a282 --- /dev/null +++ b/configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_s_coco_semi005.yml @@ -0,0 +1,151 @@ +_BASE_: [ + '../../ppyoloe/ppyoloe_plus_crn_s_80e_coco.yml', + '../_base_/coco_detection_percent_5.yml', +] +log_iter: 50 +snapshot_epoch: 5 +weights: output/denseteacher_ppyoloe_plus_crn_s_coco_semi005/model_final + +epochs: &epochs 200 +cosine_epochs: &cosine_epochs 240 + + +### pretrain and warmup config, choose one and comment another +pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/semi_det/ppyoloe_plus_crn_s_80e_coco_sup005.pdparams # mAP=32.8 +semi_start_iters: 0 +ema_start_iters: 0 +use_warmup: &use_warmup False + +# pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_s_obj365_pretrained.pdparams +# semi_start_iters: 5000 +# ema_start_iters: 3000 +# use_warmup: &use_warmup True + + +### global config +use_simple_ema: True +ema_decay: 0.9996 +ssod_method: DenseTeacher +DenseTeacher: + train_cfg: + sup_weight: 1.0 + unsup_weight: 1.0 + loss_weight: {distill_loss_cls: 1.0, distill_loss_iou: 2.5, distill_loss_dfl: 0., distill_loss_contrast: 0.1} + contrast_loss: + temperature: 0.2 + alpha: 0.9 + smooth_iter: 100 + concat_sup_data: True + suppress: linear + ratio: 0.01 + test_cfg: + inference_on: teacher + + +### reader config +batch_size: &batch_size 8 +worker_num: 2 +SemiTrainReader: + sample_transforms: + - Decode: {} + - RandomDistort: {} + - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} + - RandomFlip: {} + - RandomCrop: {} # unsup will be fake gt_boxes + weak_aug: + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], is_scale: true, norm_type: none} + strong_aug: + - StrongAugImage: {transforms: [ + RandomColorJitter: {prob: 0.8, brightness: 0.4, contrast: 0.4, saturation: 0.4, hue: 0.1}, + RandomErasingCrop: {}, + RandomGaussianBlur: {prob: 0.5, sigma: [0.1, 2.0]}, + RandomGrayscale: {prob: 0.2}, + ]} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], is_scale: true, norm_type: none} + sup_batch_transforms: + - BatchRandomResize: {target_size: [640], random_size: True, random_interp: True, keep_ratio: False} + - Permute: {} + - PadGT: {} + unsup_batch_transforms: + - BatchRandomResize: {target_size: [640], random_size: True, random_interp: True, keep_ratio: False} + - Permute: {} + sup_batch_size: *batch_size + unsup_batch_size: *batch_size + shuffle: True + drop_last: True + collate_batch: True + +EvalReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 2 + +TestReader: + inputs_def: + image_shape: [3, 640, 640] + sample_transforms: + - Decode: {} + - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 1 + + +### model config +architecture: PPYOLOE +norm_type: sync_bn +ema_black_list: ['proj_conv.weight'] +custom_black_list: ['reduce_mean'] +PPYOLOE: + backbone: CSPResNet + neck: CustomCSPPAN + yolo_head: PPYOLOEHead + post_process: ~ + +eval_size: ~ # means None, but not str 'None' +PPYOLOEHead: + fpn_strides: [32, 16, 8] + grid_cell_scale: 5.0 + grid_cell_offset: 0.5 + static_assigner_epoch: -1 # + use_varifocal_loss: True + loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5} + static_assigner: + name: ATSSAssigner + topk: 9 + assigner: + name: TaskAlignedAssigner + topk: 13 + alpha: 1.0 + beta: 6.0 + nms: + name: MultiClassNMS + nms_top_k: 1000 + keep_top_k: 300 + score_threshold: 0.01 + nms_threshold: 0.7 + + +### other config +epoch: *epochs +LearningRate: + base_lr: 0.01 + schedulers: + - !CosineDecay + max_epochs: *cosine_epochs + use_warmup: *use_warmup + - !LinearWarmup + start_factor: 0.001 + epochs: 3 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 # dt-fcos 0.0001 + type: L2 + clip_grad_by_norm: 1.0 # dt-fcos clip_grad_by_value diff --git a/configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_s_coco_semi010.yml b/configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_s_coco_semi010.yml new file mode 100644 index 000000000..5855523e0 --- /dev/null +++ b/configs/semi_det/denseteacher/denseteacher_ppyoloe_plus_crn_s_coco_semi010.yml @@ -0,0 +1,151 @@ +_BASE_: [ + '../../ppyoloe/ppyoloe_plus_crn_s_80e_coco.yml', + '../_base_/coco_detection_percent_10.yml', +] +log_iter: 50 +snapshot_epoch: 5 +weights: output/denseteacher_ppyoloe_plus_crn_s_coco_semi010/model_final + +epochs: &epochs 200 +cosine_epochs: &cosine_epochs 240 + + +### pretrain and warmup config, choose one and comment another +pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/semi_det/ppyoloe_plus_crn_s_80e_coco_sup010.pdparams # mAP=35.3 +semi_start_iters: 0 +ema_start_iters: 0 +use_warmup: &use_warmup False + +# pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_s_obj365_pretrained.pdparams +# semi_start_iters: 5000 +# ema_start_iters: 3000 +# use_warmup: &use_warmup True + + +### global config +use_simple_ema: True +ema_decay: 0.9996 +ssod_method: DenseTeacher +DenseTeacher: + train_cfg: + sup_weight: 1.0 + unsup_weight: 1.0 + loss_weight: {distill_loss_cls: 1.0, distill_loss_iou: 2.5, distill_loss_dfl: 0., distill_loss_contrast: 0.1} + contrast_loss: + temperature: 0.2 + alpha: 0.9 + smooth_iter: 100 + concat_sup_data: True + suppress: linear + ratio: 0.01 + test_cfg: + inference_on: teacher + + +### reader config +batch_size: &batch_size 8 +worker_num: 2 +SemiTrainReader: + sample_transforms: + - Decode: {} + - RandomDistort: {} + - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} + - RandomFlip: {} + - RandomCrop: {} # unsup will be fake gt_boxes + weak_aug: + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], is_scale: true, norm_type: none} + strong_aug: + - StrongAugImage: {transforms: [ + RandomColorJitter: {prob: 0.8, brightness: 0.4, contrast: 0.4, saturation: 0.4, hue: 0.1}, + RandomErasingCrop: {}, + RandomGaussianBlur: {prob: 0.5, sigma: [0.1, 2.0]}, + RandomGrayscale: {prob: 0.2}, + ]} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], is_scale: true, norm_type: none} + sup_batch_transforms: + - BatchRandomResize: {target_size: [640], random_size: True, random_interp: True, keep_ratio: False} + - Permute: {} + - PadGT: {} + unsup_batch_transforms: + - BatchRandomResize: {target_size: [640], random_size: True, random_interp: True, keep_ratio: False} + - Permute: {} + sup_batch_size: *batch_size + unsup_batch_size: *batch_size + shuffle: True + drop_last: True + collate_batch: True + +EvalReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 2 + +TestReader: + inputs_def: + image_shape: [3, 640, 640] + sample_transforms: + - Decode: {} + - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 1 + + +### model config +architecture: PPYOLOE +norm_type: sync_bn +ema_black_list: ['proj_conv.weight'] +custom_black_list: ['reduce_mean'] +PPYOLOE: + backbone: CSPResNet + neck: CustomCSPPAN + yolo_head: PPYOLOEHead + post_process: ~ + +eval_size: ~ # means None, but not str 'None' +PPYOLOEHead: + fpn_strides: [32, 16, 8] + grid_cell_scale: 5.0 + grid_cell_offset: 0.5 + static_assigner_epoch: -1 # + use_varifocal_loss: True + loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5} + static_assigner: + name: ATSSAssigner + topk: 9 + assigner: + name: TaskAlignedAssigner + topk: 13 + alpha: 1.0 + beta: 6.0 + nms: + name: MultiClassNMS + nms_top_k: 1000 + keep_top_k: 300 + score_threshold: 0.01 + nms_threshold: 0.7 + + +### other config +epoch: *epochs +LearningRate: + base_lr: 0.01 + schedulers: + - !CosineDecay + max_epochs: *cosine_epochs + use_warmup: *use_warmup + - !LinearWarmup + start_factor: 0.001 + epochs: 3 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 # dt-fcos 0.0001 + type: L2 + clip_grad_by_norm: 1.0 # dt-fcos clip_grad_by_value diff --git a/ppdet/data/transform/operators.py b/ppdet/data/transform/operators.py index 3a68282de..9b390f018 100644 --- a/ppdet/data/transform/operators.py +++ b/ppdet/data/transform/operators.py @@ -1420,10 +1420,38 @@ class RandomCrop(BaseOperator): crop_segms.append(_crop_rle(segm, crop, height, width)) return crop_segms + def set_fake_bboxes(self, sample): + sample['gt_bbox'] = np.array( + [ + [32, 32, 128, 128], + [32, 32, 128, 256], + [32, 64, 128, 128], + [32, 64, 128, 256], + [64, 64, 128, 256], + [64, 64, 256, 256], + [64, 32, 128, 256], + [64, 32, 128, 256], + [96, 32, 128, 256], + [96, 32, 128, 256], + ], + dtype=np.float32) + sample['gt_class'] = np.array( + [[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]], np.int32) + return sample + def apply(self, sample, context=None): + if 'gt_bbox' not in sample: + # only used in semi-det as unsup data + sample = self.set_fake_bboxes(sample) + sample = self.random_crop(sample, fake_bboxes=True) + return sample + if 'gt_bbox' in sample and len(sample['gt_bbox']) == 0: return sample + sample = self.random_crop(sample) + return sample + def random_crop(self, sample, fake_bboxes=False): h, w = sample['image'].shape[:2] gt_bbox = sample['gt_bbox'] @@ -1515,6 +1543,9 @@ class RandomCrop(BaseOperator): sample['gt_segm'], valid_ids, axis=0) sample['image'] = self._crop_image(sample['image'], crop_box) + if fake_bboxes == True: + return sample + sample['gt_bbox'] = np.take(cropped_box, valid_ids, axis=0) sample['gt_class'] = np.take( sample['gt_class'], valid_ids, axis=0) diff --git a/ppdet/engine/trainer_ssod.py b/ppdet/engine/trainer_ssod.py index 891a1eaf4..90b8a9f7f 100644 --- a/ppdet/engine/trainer_ssod.py +++ b/ppdet/engine/trainer_ssod.py @@ -16,12 +16,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os -import sys import copy import time import typing -import math import numpy as np import paddle @@ -317,16 +314,14 @@ class Trainer_DenseTeacher(Trainer): data_unsup_w['is_teacher'] = True teacher_preds = self.ema.model(data_unsup_w) + train_cfg['curr_iter'] = curr_iter + train_cfg['st_iter'] = st_iter if self._nranks > 1: - loss_dict_unsup = self.model._layers.get_distill_loss( - student_preds, - teacher_preds, - ratio=train_cfg['ratio']) + loss_dict_unsup = self.model._layers.get_ssod_distill_loss( + student_preds, teacher_preds, train_cfg) else: - loss_dict_unsup = self.model.get_distill_loss( - student_preds, - teacher_preds, - ratio=train_cfg['ratio']) + loss_dict_unsup = self.model.get_ssod_distill_loss( + student_preds, teacher_preds, train_cfg) fg_num = loss_dict_unsup["fg_sum"] del loss_dict_unsup["fg_sum"] diff --git a/ppdet/modeling/architectures/fcos.py b/ppdet/modeling/architectures/fcos.py index 615761ecf..4a892c836 100644 --- a/ppdet/modeling/architectures/fcos.py +++ b/ppdet/modeling/architectures/fcos.py @@ -85,12 +85,10 @@ class FCOS(BaseArch): def get_loss_keys(self): return ['loss_cls', 'loss_box', 'loss_quality'] - def get_distill_loss(self, - fcos_head_outs, - teacher_fcos_head_outs, - ratio=0.01): - student_logits, student_deltas, student_quality = fcos_head_outs - teacher_logits, teacher_deltas, teacher_quality = teacher_fcos_head_outs + def get_ssod_distill_loss(self, student_head_outs, teacher_head_outs, + train_cfg): + student_logits, student_deltas, student_quality = student_head_outs + teacher_logits, teacher_deltas, teacher_quality = teacher_head_outs nc = student_logits[0].shape[1] student_logits = paddle.concat( @@ -132,6 +130,7 @@ class FCOS(BaseArch): ], axis=0) + ratio = train_cfg.get('ratio', 0.01) with paddle.no_grad(): # Region Selection count_num = int(teacher_logits.shape[0] * ratio) diff --git a/ppdet/modeling/architectures/ppyoloe.py b/ppdet/modeling/architectures/ppyoloe.py index e646c30db..96b556aea 100644 --- a/ppdet/modeling/architectures/ppyoloe.py +++ b/ppdet/modeling/architectures/ppyoloe.py @@ -16,10 +16,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import paddle import copy + +import paddle +import paddle.nn.functional as F from ppdet.core.workspace import register, create from .meta_arch import BaseArch +from ..ssod_utils import QFLv2 +from ..losses import GIoULoss __all__ = ['PPYOLOE', 'PPYOLOEWithAuxHead'] # PP-YOLOE and PP-YOLOE+ are recommended to use this architecture, especially when use distillation or aux head @@ -57,6 +61,11 @@ class PPYOLOE(BaseArch): self.yolo_head = yolo_head self.post_process = post_process self.for_mot = for_mot + + # semi-det + self.is_teacher = False + + # distill self.for_distill = for_distill self.feat_distill_place = feat_distill_place if for_distill: @@ -85,7 +94,8 @@ class PPYOLOE(BaseArch): body_feats = self.backbone(self.inputs) neck_feats = self.neck(body_feats, self.for_mot) - if self.training: + self.is_teacher = self.inputs.get('is_teacher', False) # for semi-det + if self.training or self.is_teacher: yolo_losses = self.yolo_head(neck_feats, self.inputs) if self.for_distill: @@ -121,6 +131,110 @@ class PPYOLOE(BaseArch): def get_pred(self): return self._forward() + def get_loss_keys(self): + return ['loss_cls', 'loss_iou', 'loss_dfl', 'loss_contrast'] + + def get_ssod_distill_loss(self, student_head_outs, teacher_head_outs, + train_cfg): + # for semi-det distill + # student_probs: already sigmoid + student_probs, student_deltas, student_dfl = student_head_outs + teacher_probs, teacher_deltas, teacher_dfl = teacher_head_outs + bs, l, nc = student_probs.shape[:] + student_probs = student_probs.reshape([-1, nc]) + teacher_probs = teacher_probs.reshape([-1, nc]) + student_deltas = student_deltas.reshape([-1, 4]) + teacher_deltas = teacher_deltas.reshape([-1, 4]) + student_dfl = student_dfl.reshape([-1, 4, self.yolo_head.reg_channels]) + teacher_dfl = teacher_dfl.reshape([-1, 4, self.yolo_head.reg_channels]) + + ratio = train_cfg.get('ratio', 0.01) + + # for contrast loss + curr_iter = train_cfg['curr_iter'] + st_iter = train_cfg['st_iter'] + if curr_iter == st_iter + 1: + # start semi-det training + self.queue_ptr = 0 + self.queue_size = int(bs * l * ratio) + self.queue_feats = paddle.zeros([self.queue_size, nc]) + self.queue_probs = paddle.zeros([self.queue_size, nc]) + contrast_loss_cfg = train_cfg['contrast_loss'] + temperature = contrast_loss_cfg.get('temperature', 0.2) + alpha = contrast_loss_cfg.get('alpha', 0.9) + smooth_iter = contrast_loss_cfg.get('smooth_iter', 100) + st_iter + + with paddle.no_grad(): + # Region Selection + count_num = int(teacher_probs.shape[0] * ratio) + max_vals = paddle.max(teacher_probs, 1) + sorted_vals, sorted_inds = paddle.topk(max_vals, + teacher_probs.shape[0]) + mask = paddle.zeros_like(max_vals) + mask[sorted_inds[:count_num]] = 1. + fg_num = sorted_vals[:count_num].sum() + b_mask = mask > 0. + + # for contrast loss + probs = teacher_probs[b_mask].detach() + if curr_iter > smooth_iter: # memory-smoothing + A = paddle.exp( + paddle.mm(teacher_probs[b_mask], self.queue_probs.t()) / + temperature) + A = A / A.sum(1, keepdim=True) + probs = alpha * probs + (1 - alpha) * paddle.mm( + A, self.queue_probs) + n = student_probs[b_mask].shape[0] + # update memory bank + self.queue_feats[self.queue_ptr:self.queue_ptr + + n, :] = teacher_probs[b_mask].detach() + self.queue_probs[self.queue_ptr:self.queue_ptr + + n, :] = teacher_probs[b_mask].detach() + self.queue_ptr = (self.queue_ptr + n) % self.queue_size + + # embedding similarity + sim = paddle.exp( + paddle.mm(student_probs[b_mask], teacher_probs[b_mask].t()) / 0.2) + sim_probs = sim / sim.sum(1, keepdim=True) + # pseudo-label graph with self-loop + Q = paddle.mm(probs, probs.t()) + Q.fill_diagonal_(1) + pos_mask = (Q >= 0.5).astype('float32') + Q = Q * pos_mask + Q = Q / Q.sum(1, keepdim=True) + # contrastive loss + loss_contrast = -(paddle.log(sim_probs + 1e-7) * Q).sum(1) + loss_contrast = loss_contrast.mean() + + # distill_loss_cls + loss_cls = QFLv2( + student_probs, teacher_probs, weight=mask, reduction="sum") / fg_num + + # distill_loss_iou + inputs = paddle.concat( + (-student_deltas[b_mask][..., :2], student_deltas[b_mask][..., 2:]), + -1) + targets = paddle.concat( + (-teacher_deltas[b_mask][..., :2], teacher_deltas[b_mask][..., 2:]), + -1) + iou_loss = GIoULoss(reduction='mean') + loss_iou = iou_loss(inputs, targets) + + # distill_loss_dfl + loss_dfl = F.cross_entropy( + student_dfl[b_mask].reshape([-1, self.yolo_head.reg_channels]), + teacher_dfl[b_mask].reshape([-1, self.yolo_head.reg_channels]), + soft_label=True, + reduction='mean') + + return { + "distill_loss_cls": loss_cls, + "distill_loss_iou": loss_iou, + "distill_loss_dfl": loss_dfl, + "distill_loss_contrast": loss_contrast, + "fg_sum": fg_num, + } + @register class PPYOLOEWithAuxHead(BaseArch): diff --git a/ppdet/modeling/heads/ppyoloe_head.py b/ppdet/modeling/heads/ppyoloe_head.py index 38d4d5415..1eb735194 100644 --- a/ppdet/modeling/heads/ppyoloe_head.py +++ b/ppdet/modeling/heads/ppyoloe_head.py @@ -112,6 +112,7 @@ class PPYOLOEHead(nn.Layer): self.exclude_post_process = exclude_post_process self.use_shared_conv = use_shared_conv self.for_distill = for_distill + self.is_teacher = False # stem self.stem_cls = nn.LayerList() @@ -181,6 +182,14 @@ class PPYOLOEHead(nn.Layer): cls_score_list = paddle.concat(cls_score_list, axis=1) reg_distri_list = paddle.concat(reg_distri_list, axis=1) + if targets.get('is_teacher', False): + pred_deltas, pred_dfls = self._bbox_decode_fake(reg_distri_list) + return cls_score_list, pred_deltas * stride_tensor, pred_dfls + + if targets.get('get_data', False): + pred_deltas, pred_dfls = self._bbox_decode_fake(reg_distri_list) + return cls_score_list, pred_deltas * stride_tensor, pred_dfls + return self.get_loss([ cls_score_list, reg_distri_list, anchors, anchor_points, num_anchors_list, stride_tensor @@ -249,6 +258,14 @@ class PPYOLOEHead(nn.Layer): if self.training: return self.forward_train(feats, targets, aux_pred) else: + if targets is not None: + # only for semi-det + self.is_teacher = targets.get('is_teacher', False) + if self.is_teacher: + return self.forward_train(feats, targets, aux_pred=None) + else: + return self.forward_eval(feats) + return self.forward_eval(feats) @staticmethod @@ -274,6 +291,14 @@ class PPYOLOEHead(nn.Layer): pred_dist = self.proj_conv(pred_dist.transpose([0, 3, 1, 2])).squeeze(1) return batch_distance2bbox(anchor_points, pred_dist) + def _bbox_decode_fake(self, pred_dist): + _, l, _ = get_static_shape(pred_dist) + pred_dist_dfl = F.softmax( + pred_dist.reshape([-1, l, 4, self.reg_channels])) + pred_dist = self.proj_conv(pred_dist_dfl.transpose([0, 3, 1, 2 + ])).squeeze(1) + return pred_dist, pred_dist_dfl + def _bbox2distance(self, points, bbox): x1y1, x2y2 = paddle.split(bbox, 2, -1) lt = points - x1y1 @@ -388,11 +413,13 @@ class PPYOLOEHead(nn.Layer): gt_bboxes, pad_gt_mask, bg_index=self.num_classes) - self.assigned_labels = assigned_labels - self.assigned_bboxes = assigned_bboxes - self.assigned_scores = assigned_scores - self.mask_positive = mask_positive + if self.for_distill: + self.assigned_labels = assigned_labels + self.assigned_bboxes = assigned_bboxes + self.assigned_scores = assigned_scores + self.mask_positive = mask_positive else: + # only used in distill assigned_labels = self.assigned_labels assigned_bboxes = self.assigned_bboxes assigned_scores = self.assigned_scores diff --git a/ppdet/modeling/ssod_utils.py b/ppdet/modeling/ssod_utils.py index a0c0a95b1..3f29ef3f4 100644 --- a/ppdet/modeling/ssod_utils.py +++ b/ppdet/modeling/ssod_utils.py @@ -35,12 +35,12 @@ def align_weak_strong_shape(data_weak, data_strong): mode='bilinear', align_corners=False) if 'gt_bbox' in data_strong: - gt_bboxes = data_strong['gt_bbox'] + gt_bboxes = data_strong['gt_bbox'].numpy() for i in range(len(gt_bboxes)): if len(gt_bboxes[i]) > 0: gt_bboxes[i][:, 0::2] = gt_bboxes[i][:, 0::2] * scale_x_s gt_bboxes[i][:, 1::2] = gt_bboxes[i][:, 1::2] * scale_y_s - data_strong['gt_bbox'] = gt_bboxes + data_strong['gt_bbox'] = paddle.to_tensor(gt_bboxes) if scale_x_w != 1 or scale_y_w != 1: data_weak['image'] = F.interpolate( @@ -49,12 +49,12 @@ def align_weak_strong_shape(data_weak, data_strong): mode='bilinear', align_corners=False) if 'gt_bbox' in data_weak: - gt_bboxes = data_weak['gt_bbox'] + gt_bboxes = data_weak['gt_bbox'].numpy() for i in range(len(gt_bboxes)): if len(gt_bboxes[i]) > 0: gt_bboxes[i][:, 0::2] = gt_bboxes[i][:, 0::2] * scale_x_w gt_bboxes[i][:, 1::2] = gt_bboxes[i][:, 1::2] * scale_y_w - data_weak['gt_bbox'] = gt_bboxes + data_weak['gt_bbox'] = paddle.to_tensor(gt_bboxes) return data_weak, data_strong -- GitLab