From ca227ee40b3be0a7d3c3de52d7c82e1c546bd27c Mon Sep 17 00:00:00 2001 From: wangxinxin08 <69842442+wangxinxin08@users.noreply.github.com> Date: Thu, 8 Dec 2022 16:44:17 +0800 Subject: [PATCH] add sku110k models (#7465) * add sku110k models * add link in ppyoloe doc * modify directory of sku110k * fix data config problem --- configs/ppyoloe/README.md | 2 + configs/ppyoloe/README_cn.md | 2 + configs/ppyoloe/application/README.md | 21 +++ .../ppyoloe/application/_base_/sku110k.yml | 21 +++ .../ppyoloe_plus_crn_l_80e_sku110k.yml | 127 ++++++++++++++++++ .../ppyoloe_plus_crn_m_80e_sku110k.yml | 127 ++++++++++++++++++ .../ppyoloe_plus_crn_s_80e_sku110k.yml | 127 ++++++++++++++++++ .../ppyoloe_plus_crn_x_80e_sku110k.yml | 127 ++++++++++++++++++ 8 files changed, 554 insertions(+) create mode 100644 configs/ppyoloe/application/_base_/sku110k.yml create mode 100644 configs/ppyoloe/application/ppyoloe_plus_crn_l_80e_sku110k.yml create mode 100644 configs/ppyoloe/application/ppyoloe_plus_crn_m_80e_sku110k.yml create mode 100644 configs/ppyoloe/application/ppyoloe_plus_crn_s_80e_sku110k.yml create mode 100644 configs/ppyoloe/application/ppyoloe_plus_crn_x_80e_sku110k.yml diff --git a/configs/ppyoloe/README.md b/configs/ppyoloe/README.md index 6c38907d3..6f1c7f976 100644 --- a/configs/ppyoloe/README.md +++ b/configs/ppyoloe/README.md @@ -89,6 +89,8 @@ The PaddleDetection team provides configs and weights of various feature detecti |Pedestrian Detection | CrowdHuman | [pphuman](../pphuman) | |Vehicle Detection | BDD100K, UA-DETRAC | [ppvehicle](../ppvehicle) | |Small Object Detection | VisDrone、DOTA、xView | [smalldet](../smalldet) | +|Densely Packed Object Detection | SKU110k | [application](./application) | +|Rotated Object Detection | DOTA | [PP-YOLOE-R](../rotate/ppyoloe_r/) | ## Getting Start diff --git a/configs/ppyoloe/README_cn.md b/configs/ppyoloe/README_cn.md index 96701dc15..a90b6b0bf 100644 --- a/configs/ppyoloe/README_cn.md +++ b/configs/ppyoloe/README_cn.md @@ -88,6 +88,8 @@ PaddleDetection团队提供了基于PP-YOLOE的各种垂类检测模型的配置 | 行人检测 | CrowdHuman | [pphuman](../pphuman) | | 车辆检测 | BDD100K、UA-DETRAC | [ppvehicle](../ppvehicle) | | 小目标检测 | VisDrone、DOTA、xView | [smalldet](../smalldet) | +| 密集目标检测 | SKU110k | [application](./application) | +| 旋转框检测 | DOTA | [PP-YOLOE-R](../rotate/ppyoloe_r/) | ## 使用说明 diff --git a/configs/ppyoloe/application/README.md b/configs/ppyoloe/application/README.md index 5dd8283fa..41bf34f5b 100644 --- a/configs/ppyoloe/application/README.md +++ b/configs/ppyoloe/application/README.md @@ -11,6 +11,8 @@ 工业数据集使用[PKU-Market-PCB](https://robotics.pkusz.edu.cn/resources/dataset/),该数据集用于印刷电路板(PCB)的瑕疵检测,提供了6种常见的PCB缺陷, 处理后的COCO格式,包含图片训练集555张,测试集138张,6个类别,[PKU-Market-PCB COCO格式下载](https://bj.bcebos.com/v1/paddledet/data/PCB_coco.zip)。 +商超数据集[SKU110k](https://github.com/eg4000/SKU110K_CVPR19)是商品超市场景下的密集目标检测数据集,包含11,762张图片和超过170个实例。其中包括8,233张用于训练的图像、588张用于验证的图像和2,941张用于测试的图像。 + ## 实验结果: @@ -31,11 +33,30 @@ - 具体使用教程请参考[ppyoloe](../ppyoloe#getting-start)。 +## SKU110k Model ZOO +| Model | Epoch | GPU number | images/GPU | backbone | input shape | Box APval
0.5:0.95 (maxDets=300) | Box APtest
0.5:0.95 (maxDets=300) | download | config | +|:--------------:|:-----:|:-------:|:----------:|:----------:| :-------:|:-------------------------:|:---------------------------:|:---------:|:------:| +| PP-YOLOE+_s | 80 | 8 | 8 | cspresnet-s | 960 | 57.4 | 58.8 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_s_80e_sku110k.pdparams) | [config](./ppyoloe_plus_crn_s_80e_sku110k.yml) | +| PP-YOLOE+_m | 80 | 8 | 8 | cspresnet-m | 960 | 58.2 | 59.7 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_m_80e_sku110k.pdparams) | [config](./ppyoloe_plus_crn_m_80e_sku110k.yml) | +| PP-YOLOE+_l | 80 | 8 | 4 | cspresnet-l | 960 | 58.8 | 60.2 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_sku110k.pdparams) | [config](./ppyoloe_plus_crn_l_80e_sku110k.yml) | +| PP-YOLOE+_x | 80 | 8 | 4 | cspresnet-x | 960 | 59.0 | 60.3 | [download](https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_x_80e_sku110k.pdparams) | [config](./ppyoloe_plus_crn_x_80e_sku110k.yml) | + +**注意:** +- SKU110k系列模型训练过程中使用8 GPUs进行训练,如果**GPU卡数**或者**batch size**发生了改变,你需要按照公式 **lrnew = lrdefault * (batch_sizenew * GPU_numbernew) / (batch_sizedefault * GPU_numberdefault)** 调整学习率。 +- SKU110k数据集使用**maxDets=300**的mAP值作为评估指标。 +- 具体使用教程请参考[ppyoloe](../ppyoloe#getting-start)。 ## 引用 ``` +@inproceedings{goldman2019dense, + author = {Eran Goldman and Roei Herzig and Aviv Eisenschtat and Jacob Goldberger and Tal Hassner}, + title = {Precise Detection in Densely Packed Scenes}, + booktitle = {Proc. Conf. Comput. Vision Pattern Recognition (CVPR)}, + year = {2019} +} + @article{Exdark, title={Getting to Know Low-light Images with The Exclusively Dark Dataset}, author={Loh, Yuen Peng and Chan, Chee Seng}, diff --git a/configs/ppyoloe/application/_base_/sku110k.yml b/configs/ppyoloe/application/_base_/sku110k.yml new file mode 100644 index 000000000..664ce2f25 --- /dev/null +++ b/configs/ppyoloe/application/_base_/sku110k.yml @@ -0,0 +1,21 @@ +metric: COCO +num_classes: 1 + +TrainDataset: + !COCODataSet + image_dir: images + anno_path: annotations/annotations_train.json + dataset_dir: dataset/SKU110K_fixed + data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult'] + +EvalDataset: + !COCODataSet + image_dir: images + anno_path: annotations/annotations_val.json + dataset_dir: dataset/SKU110K_fixed + allow_empty: true + +TestDataset: + !ImageFolder + anno_path: annotations/annotations_test.json + dataset_dir: dataset/SKU110K_fixed diff --git a/configs/ppyoloe/application/ppyoloe_plus_crn_l_80e_sku110k.yml b/configs/ppyoloe/application/ppyoloe_plus_crn_l_80e_sku110k.yml new file mode 100644 index 000000000..858bf5f4a --- /dev/null +++ b/configs/ppyoloe/application/ppyoloe_plus_crn_l_80e_sku110k.yml @@ -0,0 +1,127 @@ +_BASE_: [ + './_base_/sku110k.yml', + '../../runtime.yml' +] + +log_iter: 10 +snapshot_epoch: 20 +weights: output/ppyoloe_plus_crn_s_80e_coco/model_final + +pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_l_obj365_pretrained.pdparams +depth_mult: 1.0 +width_mult: 1.0 + + +# arch +architecture: YOLOv3 +norm_type: sync_bn +use_ema: true +ema_decay: 0.9998 +custom_black_list: ['reduce_mean'] + +YOLOv3: + backbone: CSPResNet + neck: CustomCSPPAN + yolo_head: PPYOLOEHead + post_process: ~ + +CSPResNet: + layers: [3, 6, 6, 3] + channels: [64, 128, 256, 512, 1024] + return_idx: [1, 2, 3] + use_large_stem: True + use_alpha: True + +CustomCSPPAN: + out_channels: [768, 384, 192] + stage_num: 1 + block_num: 3 + act: 'swish' + spp: true + use_alpha: True + +PPYOLOEHead: + fpn_strides: [32, 16, 8] + grid_cell_scale: 5.0 + grid_cell_offset: 0.5 + static_assigner_epoch: -1 + use_varifocal_loss: True + loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5} + static_assigner: + name: ATSSAssigner + topk: 9 + assigner: + name: TaskAlignedAssigner + topk: 13 + alpha: 1.0 + beta: 6.0 + nms: + name: MultiClassNMS + nms_top_k: 3000 + keep_top_k: 1000 + score_threshold: 0.01 + nms_threshold: 0.7 + + +# reader +worker_num: 8 +eval_height: &eval_height 960 +eval_width: &eval_width 960 +eval_size: &eval_size [*eval_height, *eval_width] + +TrainReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [3000, 1800], keep_ratio: True, interp: 2} + - RandomDistort: {} + - RandomCrop: {} + - RandomFlip: {} + batch_transforms: + - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152], random_size: True, random_interp: True, keep_ratio: False} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + - PadGT: {} + batch_size: 4 + shuffle: true + drop_last: true + use_shared_memory: true + collate_batch: true + +EvalReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 2 + +TestReader: + inputs_def: + image_shape: [3, *eval_height, *eval_width] + sample_transforms: + - Decode: {} + - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 1 + + +# optimizer +epoch: 80 + +LearningRate: + base_lr: 0.002 + schedulers: + - !CosineDecay + max_epochs: 96 + - !LinearWarmup + start_factor: 0. + epochs: 5 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 diff --git a/configs/ppyoloe/application/ppyoloe_plus_crn_m_80e_sku110k.yml b/configs/ppyoloe/application/ppyoloe_plus_crn_m_80e_sku110k.yml new file mode 100644 index 000000000..cd7a4431c --- /dev/null +++ b/configs/ppyoloe/application/ppyoloe_plus_crn_m_80e_sku110k.yml @@ -0,0 +1,127 @@ +_BASE_: [ + './_base_/sku110k.yml', + '../../runtime.yml' +] + +log_iter: 10 +snapshot_epoch: 20 +weights: output/ppyoloe_plus_crn_s_80e_coco/model_final + +pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_m_obj365_pretrained.pdparams +depth_mult: 0.67 +width_mult: 0.75 + + +# arch +architecture: YOLOv3 +norm_type: sync_bn +use_ema: true +ema_decay: 0.9998 +custom_black_list: ['reduce_mean'] + +YOLOv3: + backbone: CSPResNet + neck: CustomCSPPAN + yolo_head: PPYOLOEHead + post_process: ~ + +CSPResNet: + layers: [3, 6, 6, 3] + channels: [64, 128, 256, 512, 1024] + return_idx: [1, 2, 3] + use_large_stem: True + use_alpha: True + +CustomCSPPAN: + out_channels: [768, 384, 192] + stage_num: 1 + block_num: 3 + act: 'swish' + spp: true + use_alpha: True + +PPYOLOEHead: + fpn_strides: [32, 16, 8] + grid_cell_scale: 5.0 + grid_cell_offset: 0.5 + static_assigner_epoch: -1 + use_varifocal_loss: True + loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5} + static_assigner: + name: ATSSAssigner + topk: 9 + assigner: + name: TaskAlignedAssigner + topk: 13 + alpha: 1.0 + beta: 6.0 + nms: + name: MultiClassNMS + nms_top_k: 3000 + keep_top_k: 1000 + score_threshold: 0.01 + nms_threshold: 0.7 + + +# reader +worker_num: 8 +eval_height: &eval_height 960 +eval_width: &eval_width 960 +eval_size: &eval_size [*eval_height, *eval_width] + +TrainReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [3000, 1800], keep_ratio: True, interp: 2} + - RandomDistort: {} + - RandomCrop: {} + - RandomFlip: {} + batch_transforms: + - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152], random_size: True, random_interp: True, keep_ratio: False} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + - PadGT: {} + batch_size: 8 + shuffle: true + drop_last: true + use_shared_memory: true + collate_batch: true + +EvalReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 2 + +TestReader: + inputs_def: + image_shape: [3, *eval_height, *eval_width] + sample_transforms: + - Decode: {} + - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 1 + + +# optimizer +epoch: 80 + +LearningRate: + base_lr: 0.004 + schedulers: + - !CosineDecay + max_epochs: 96 + - !LinearWarmup + start_factor: 0. + epochs: 5 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 diff --git a/configs/ppyoloe/application/ppyoloe_plus_crn_s_80e_sku110k.yml b/configs/ppyoloe/application/ppyoloe_plus_crn_s_80e_sku110k.yml new file mode 100644 index 000000000..e196a6845 --- /dev/null +++ b/configs/ppyoloe/application/ppyoloe_plus_crn_s_80e_sku110k.yml @@ -0,0 +1,127 @@ +_BASE_: [ + './_base_/sku110k.yml', + '../../runtime.yml' +] + +log_iter: 10 +snapshot_epoch: 20 +weights: output/ppyoloe_plus_crn_s_80e_coco/model_final + +pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_s_obj365_pretrained.pdparams +depth_mult: 0.33 +width_mult: 0.50 + + +# arch +architecture: YOLOv3 +norm_type: sync_bn +use_ema: true +ema_decay: 0.9998 +custom_black_list: ['reduce_mean'] + +YOLOv3: + backbone: CSPResNet + neck: CustomCSPPAN + yolo_head: PPYOLOEHead + post_process: ~ + +CSPResNet: + layers: [3, 6, 6, 3] + channels: [64, 128, 256, 512, 1024] + return_idx: [1, 2, 3] + use_large_stem: True + use_alpha: True + +CustomCSPPAN: + out_channels: [768, 384, 192] + stage_num: 1 + block_num: 3 + act: 'swish' + spp: true + use_alpha: True + +PPYOLOEHead: + fpn_strides: [32, 16, 8] + grid_cell_scale: 5.0 + grid_cell_offset: 0.5 + static_assigner_epoch: -1 + use_varifocal_loss: True + loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5} + static_assigner: + name: ATSSAssigner + topk: 9 + assigner: + name: TaskAlignedAssigner + topk: 13 + alpha: 1.0 + beta: 6.0 + nms: + name: MultiClassNMS + nms_top_k: 3000 + keep_top_k: 1000 + score_threshold: 0.01 + nms_threshold: 0.7 + + +# reader +worker_num: 8 +eval_height: &eval_height 960 +eval_width: &eval_width 960 +eval_size: &eval_size [*eval_height, *eval_width] + +TrainReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [3000, 1800], keep_ratio: True, interp: 2} + - RandomDistort: {} + - RandomCrop: {} + - RandomFlip: {} + batch_transforms: + - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152], random_size: True, random_interp: True, keep_ratio: False} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + - PadGT: {} + batch_size: 8 + shuffle: true + drop_last: true + use_shared_memory: true + collate_batch: true + +EvalReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 2 + +TestReader: + inputs_def: + image_shape: [3, *eval_height, *eval_width] + sample_transforms: + - Decode: {} + - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 1 + + +# optimizer +epoch: 80 + +LearningRate: + base_lr: 0.004 + schedulers: + - !CosineDecay + max_epochs: 96 + - !LinearWarmup + start_factor: 0. + epochs: 5 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 diff --git a/configs/ppyoloe/application/ppyoloe_plus_crn_x_80e_sku110k.yml b/configs/ppyoloe/application/ppyoloe_plus_crn_x_80e_sku110k.yml new file mode 100644 index 000000000..da465662c --- /dev/null +++ b/configs/ppyoloe/application/ppyoloe_plus_crn_x_80e_sku110k.yml @@ -0,0 +1,127 @@ +_BASE_: [ + './_base_/sku110k.yml', + '../../runtime.yml' +] + +log_iter: 10 +snapshot_epoch: 20 +weights: output/ppyoloe_plus_crn_s_80e_coco/model_final + +pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_x_obj365_pretrained.pdparams +depth_mult: 1.33 +width_mult: 1.25 + + +# arch +architecture: YOLOv3 +norm_type: sync_bn +use_ema: true +ema_decay: 0.9998 +custom_black_list: ['reduce_mean'] + +YOLOv3: + backbone: CSPResNet + neck: CustomCSPPAN + yolo_head: PPYOLOEHead + post_process: ~ + +CSPResNet: + layers: [3, 6, 6, 3] + channels: [64, 128, 256, 512, 1024] + return_idx: [1, 2, 3] + use_large_stem: True + use_alpha: True + +CustomCSPPAN: + out_channels: [768, 384, 192] + stage_num: 1 + block_num: 3 + act: 'swish' + spp: true + use_alpha: True + +PPYOLOEHead: + fpn_strides: [32, 16, 8] + grid_cell_scale: 5.0 + grid_cell_offset: 0.5 + static_assigner_epoch: -1 + use_varifocal_loss: True + loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5} + static_assigner: + name: ATSSAssigner + topk: 9 + assigner: + name: TaskAlignedAssigner + topk: 13 + alpha: 1.0 + beta: 6.0 + nms: + name: MultiClassNMS + nms_top_k: 3000 + keep_top_k: 1000 + score_threshold: 0.01 + nms_threshold: 0.7 + + +# reader +worker_num: 8 +eval_height: &eval_height 960 +eval_width: &eval_width 960 +eval_size: &eval_size [*eval_height, *eval_width] + +TrainReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [3000, 1800], keep_ratio: True, interp: 2} + - RandomDistort: {} + - RandomCrop: {} + - RandomFlip: {} + batch_transforms: + - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152], random_size: True, random_interp: True, keep_ratio: False} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + - PadGT: {} + batch_size: 4 + shuffle: true + drop_last: true + use_shared_memory: true + collate_batch: true + +EvalReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 2 + +TestReader: + inputs_def: + image_shape: [3, *eval_height, *eval_width] + sample_transforms: + - Decode: {} + - Resize: {target_size: *eval_size, keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + batch_size: 1 + + +# optimizer +epoch: 80 + +LearningRate: + base_lr: 0.002 + schedulers: + - !CosineDecay + max_epochs: 96 + - !LinearWarmup + start_factor: 0. + epochs: 5 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 -- GitLab