From a496c2ddf116c24b41bc71ad276bac31e7b9245f Mon Sep 17 00:00:00 2001 From: Feng Ni Date: Tue, 7 Feb 2023 19:20:25 +0800 Subject: [PATCH] Add ppyoloe distillation modelzoo (#7694) * fix tal distill and singe scale training * add modelzoo fix configs * fix docs typos, test=document_fix --- configs/ppyoloe/distill/README.md | 13 +++- .../ppyoloe_plus_crn_l_80e_coco_distill.yml | 22 +++++- .../ppyoloe_plus_crn_m_80e_coco_distill.yml | 22 +++++- .../ppyoloe_plus_crn_s_80e_coco_distill.yml | 22 +++++- configs/slim/distill/README.md | 68 ++++++++++++------- ...l => ppyoloe_plus_distill_l_distill_m.yml} | 23 ++++++- ...l => ppyoloe_plus_distill_m_distill_s.yml} | 25 +++++-- ...l => ppyoloe_plus_distill_x_distill_l.yml} | 23 ++++++- ppdet/slim/distill_model.py | 4 +- 9 files changed, 181 insertions(+), 41 deletions(-) rename configs/slim/distill/{ppyoloe_plus_distill_l_to_m.yml => ppyoloe_plus_distill_l_distill_m.yml} (62%) rename configs/slim/distill/{ppyoloe_plus_distill_m_to_s.yml => ppyoloe_plus_distill_m_distill_s.yml} (59%) rename configs/slim/distill/{ppyoloe_plus_distill_x_to_l.yml => ppyoloe_plus_distill_x_distill_l.yml} (62%) diff --git a/configs/ppyoloe/distill/README.md b/configs/ppyoloe/distill/README.md index 7453cdc2d..adc91e40b 100644 --- a/configs/ppyoloe/distill/README.md +++ b/configs/ppyoloe/distill/README.md @@ -2,9 +2,16 @@ PaddleDetection提供了对PPYOLOE+ 进行模型蒸馏的方案,结合了logits蒸馏和feature蒸馏。 - ## 模型库 +| 模型 | 方案 | 输入尺寸 | epochs | Box mAP | 配置文件 | 下载链接 | +| ----------------- | ----------- | ------ | :----: | :-----------: | :--------------: | :------------: | +| PP-YOLOE+_x | teacher | 640 | 80e | 54.7 | [config](../ppyoloe_plus_crn_x_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_x_80e_coco.pdparams) | +| PP-YOLOE+_l | student | 640 | 80e | 52.9 | [config](../ppyoloe_plus_crn_l_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco.pdparams) | +| PP-YOLOE+_l | distill | 640 | 80e | 53.9(+1.0) | [config](./ppyoloe_plus_crn_l_80e_coco_distill.yml),[slim_config](../../slim/distill/ppyoloe_plus_distill_x_distill_l.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco_distill.pdparams) | +| PP-YOLOE+_l | teacher | 640 | 80e | 52.9 | [config](../ppyoloe_plus_crn_l_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco.pdparams) | +| PP-YOLOE+_m | student | 640 | 80e | 49.8 | [config](../ppyoloe_plus_crn_m_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_m_80e_coco.pdparams) | +| PP-YOLOE+_m | distill | 640 | 80e | 50.7(+0.9) | [config](./ppyoloe_plus_crn_m_80e_coco_distill.yml),[slim_config](../../slim/distill/ppyoloe_plus_distill_l_distill_m.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_m_80e_coco_distill.pdparams) | ## 快速开始 @@ -12,9 +19,9 @@ PaddleDetection提供了对PPYOLOE+ 进行模型蒸馏的方案,结合了logit ### 训练 ```shell # 单卡 -python tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_to_l.yml +python tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml # 多卡 -python3.7 -m paddle.distributed.launch --log_dir=ppyoloe_plus_distill_x_to_l/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_to_l.yml +python -m paddle.distributed.launch --log_dir=ppyoloe_plus_distill_x_distill_l/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml ``` - `-c`: 指定模型配置文件,也是student配置文件。 diff --git a/configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml b/configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml index a75e5857b..c000a4898 100644 --- a/configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml +++ b/configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml @@ -10,9 +10,29 @@ PPYOLOE: post_process: ~ +worker_num: 4 +TrainReader: + sample_transforms: + - Decode: {} + - RandomDistort: {} + - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} + - RandomCrop: {} + - RandomFlip: {} + batch_transforms: + - BatchRandomResize: {target_size: [640], random_size: True, random_interp: True, keep_ratio: False} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + - PadGT: {} + batch_size: 8 + shuffle: True + drop_last: True + use_shared_memory: True + collate_batch: True + + log_iter: 100 snapshot_epoch: 5 -weights: output/ppyoloe_plus_crn_l_80e_coco/model_final +weights: output/ppyoloe_plus_crn_l_80e_coco_distill/model_final pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_l_obj365_pretrained.pdparams depth_mult: 1.0 diff --git a/configs/ppyoloe/distill/ppyoloe_plus_crn_m_80e_coco_distill.yml b/configs/ppyoloe/distill/ppyoloe_plus_crn_m_80e_coco_distill.yml index 5838110fe..ef2f38510 100644 --- a/configs/ppyoloe/distill/ppyoloe_plus_crn_m_80e_coco_distill.yml +++ b/configs/ppyoloe/distill/ppyoloe_plus_crn_m_80e_coco_distill.yml @@ -10,9 +10,29 @@ PPYOLOE: post_process: ~ +worker_num: 4 +TrainReader: + sample_transforms: + - Decode: {} + - RandomDistort: {} + - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} + - RandomCrop: {} + - RandomFlip: {} + batch_transforms: + - BatchRandomResize: {target_size: [640], random_size: True, random_interp: True, keep_ratio: False} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + - PadGT: {} + batch_size: 8 + shuffle: True + drop_last: True + use_shared_memory: True + collate_batch: True + + log_iter: 100 snapshot_epoch: 5 -weights: output/ppyoloe_plus_crn_m_80e_coco/model_final +weights: output/ppyoloe_plus_crn_m_80e_coco_distill/model_final pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_m_obj365_pretrained.pdparams depth_mult: 0.67 diff --git a/configs/ppyoloe/distill/ppyoloe_plus_crn_s_80e_coco_distill.yml b/configs/ppyoloe/distill/ppyoloe_plus_crn_s_80e_coco_distill.yml index 45d281378..95ac5d0ca 100644 --- a/configs/ppyoloe/distill/ppyoloe_plus_crn_s_80e_coco_distill.yml +++ b/configs/ppyoloe/distill/ppyoloe_plus_crn_s_80e_coco_distill.yml @@ -10,9 +10,29 @@ PPYOLOE: post_process: ~ +worker_num: 4 +TrainReader: + sample_transforms: + - Decode: {} + - RandomDistort: {} + - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} + - RandomCrop: {} + - RandomFlip: {} + batch_transforms: + - BatchRandomResize: {target_size: [640], random_size: True, random_interp: True, keep_ratio: False} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + - PadGT: {} + batch_size: 8 + shuffle: True + drop_last: True + use_shared_memory: True + collate_batch: True + + log_iter: 100 snapshot_epoch: 5 -weights: output/ppyoloe_plus_crn_s_80e_coco/model_final +weights: output/ppyoloe_plus_crn_s_80e_coco_distill/model_final pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_s_obj365_pretrained.pdparams depth_mult: 0.33 diff --git a/configs/slim/distill/README.md b/configs/slim/distill/README.md index d3f16135a..6ffdf50da 100644 --- a/configs/slim/distill/README.md +++ b/configs/slim/distill/README.md @@ -1,45 +1,67 @@ # Distillation(蒸馏) ## YOLOv3模型蒸馏 + 以YOLOv3-MobileNetV1为例,使用YOLOv3-ResNet34作为蒸馏训练的teacher网络, 对YOLOv3-MobileNetV1结构的student网络进行蒸馏。 COCO数据集作为目标检测任务的训练目标难度更大,意味着teacher网络会预测出更多的背景bbox,如果直接用teacher的预测输出作为student学习的`soft label`会有严重的类别不均衡问题。解决这个问题需要引入新的方法,详细背景请参考论文:[Object detection at 200 Frames Per Second](https://arxiv.org/abs/1805.06361)。 -为了确定蒸馏的对象,我们首先需要找到student和teacher网络得到的`x,y,w,h,cls,objness`等Tensor,用teacher得到的结果指导student训练。具体实现可参考[代码](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/ppdet/slim/distill.py) +为了确定蒸馏的对象,我们首先需要找到student和teacher网络得到的`x,y,w,h,cls,objectness`等Tensor,用teacher得到的结果指导student训练。具体实现可参考[代码](../../../ppdet/slim/distill_loss.py) + +| 模型 | 方案 | 输入尺寸 | epochs | Box mAP | 配置文件 | 下载链接 | +| :---------------: | :---------: | :----: | :----: |:-----------: | :--------------: | :------------: | +| YOLOv3-ResNet34 | teacher | 608 | 270e | 36.2 | [config](../../yolov3/yolov3_r34_270e_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/yolov3_r34_270e_coco.pdparams) | +| YOLOv3-MobileNetV1 | student | 608 | 270e | 29.4 | [config](../../yolov3/yolov3_mobilenet_v1_270e_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams) | +| YOLOv3-MobileNetV1 | distill | 608 | 270e | 31.0(+1.6) | [config](../../yolov3/yolov3_mobilenet_v1_270e_coco.yml),[slim_config](./yolov3_mobilenet_v1_coco_distill.yml) | [download](https://paddledet.bj.bcebos.com/models/slim/yolov3_mobilenet_v1_coco_distill.pdparams) | ## FGD模型蒸馏 FGD全称为[Focal and Global Knowledge Distillation for Detectors](https://arxiv.org/abs/2111.11837v1),是目标检测任务的一种蒸馏方法,FGD蒸馏分为两个部分`Focal`和`Global`。`Focal`蒸馏分离图像的前景和背景,让学生模型分别关注教师模型的前景和背景部分特征的关键像素;`Global`蒸馏部分重建不同像素之间的关系并将其从教师转移到学生,以补偿`Focal`蒸馏中丢失的全局信息。试验结果表明,FGD蒸馏算法在基于anchor和anchor free的方法上能有效提升模型精度。 -在PaddleDetection中,我们实现了FGD算法,并基于retinaNet算法进行验证,实验结果如下: -| algorithm | model | AP | download| -|:-:| :-: | :-: | :-:| -|retinaNet_r101_fpn_2x | teacher | 40.6 | [download](https://paddledet.bj.bcebos.com/models/retinanet_r101_fpn_2x_coco.pdparams) | -|retinaNet_r50_fpn_1x| student | 37.5 |[download](https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_1x_coco.pdparams) | -|retinaNet_r50_fpn_2x + FGD| student | 40.8 |[download](https://paddledet.bj.bcebos.com/models/retinanet_r101_distill_r50_2x_coco.pdparams) | +在PaddleDetection中,我们实现了FGD算法,并基于RetinaNet算法进行验证,实验结果如下: + +| 模型 | 方案 | 输入尺寸 | epochs | Box mAP | 配置文件 | 下载链接 | +| ----------------- | ----------- | ------ | :----: | :-----------: | :--------------: | :------------: | +| RetinaNet-ResNet101| teacher | 1333x800 | 2x | 40.6 | [config](../../retinanet/retinanet_r101_fpn_2x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/retinanet_r101_fpn_2x_coco.pdparams) | +| RetinaNet-ResNet50 | student | 1333x800 | 2x | 39.1 | [config](../../retinanet/retinanet_r50_fpn_2x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_2x_coco.pdparams) | +| RetinaNet-ResNet50 | FGD | 1333x800 | 2x | 40.8(+1.7) | [config](../../retinanet/retinanet_r50_fpn_2x_coco.yml),[slim_config](./retinanet_resnet101_coco_distill.yml) | [download](https://paddledet.bj.bcebos.com/models/retinanet_r101_distill_r50_2x_coco.pdparams) | ## LD模型蒸馏 LD全称为[Localization Distillation for Dense Object Detection](https://arxiv.org/abs/2102.12252),将回归框表示为概率分布,把分类任务的KD用在定位任务上,并且使用因地制宜、分而治之的策略,在不同的区域分别学习分类知识与定位知识。在PaddleDetection中,我们实现了LD算法,并基于GFL模型进行验证,实验结果如下: -| algorithm | model | AP | download| -|:-:| :-: | :-: | :-:| -| GFL_ResNet101-vd | teacher | 46.8 | [model](https://paddledet.bj.bcebos.com/models/gfl_r101vd_fpn_mstrain_2x_coco.pdparams), [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/gfl/gfl_r101vd_fpn_mstrain_2x_coco.yml) | -| GFL_ResNet18-vd | student | 36.6 | [model](https://paddledet.bj.bcebos.com/models/gfl_r18vd_1x_coco.pdparams), [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/gfl/gfl_r18vd_1x_coco.yml) | -| GFL_ResNet18-vd + LD | student | 38.2 | [model](https://bj.bcebos.com/v1/paddledet/models/gfl_slim_ld_r18vd_1x_coco.pdparams), [config1](../../gfl/gfl_slim_ld_r18vd_1x_coco.yml), [config2](./gfl_ld_distill.yml) | + +| 模型 | 方案 | 输入尺寸 | epochs | Box mAP | 配置文件 | 下载链接 | +| ----------------- | ----------- | ------ | :----: | :-----------: | :--------------: | :------------: | +| GFL_ResNet101-vd| teacher | 1333x800 | 2x | 46.8 | [config](../../gfl/gfl_r101vd_fpn_mstrain_2x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/gfl_r101vd_fpn_mstrain_2x_coco.pdparams) | +| GFL_ResNet18-vd | student | 1333x800 | 1x | 36.6 | [config](../../gfl/gfl_r18vd_1x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/gfl_r18vd_1x_coco.pdparams) | +| GFL_ResNet18-vd | LD | 1333x800 | 1x | 38.2(+1.6) | [config](../../gfl/gfl_slim_ld_r18vd_1x_coco.yml),[slim_config](./gfl_ld_distill.yml) | [download](https://bj.bcebos.com/v1/paddledet/models/gfl_slim_ld_r18vd_1x_coco.pdparams) | + ## CWD模型蒸馏 CWD全称为[Channel-wise Knowledge Distillation for Dense Prediction*](https://arxiv.org/pdf/2011.13256.pdf),通过最小化教师网络与学生网络的通道概率图之间的 Kullback-Leibler (KL) 散度,使得在蒸馏过程更加关注每个通道的最显著的区域,进而提升文本检测与图像分割任务的精度。在PaddleDetection中,我们实现了CWD算法,并基于GFL和RetinaNet模型进行验证,实验结果如下: -| algorithm | model | AP | download| -|:-:| :-: | :-: | :-:| -|retinaNet_r101_fpn_2x | teacher | 40.6 | [download](https://paddledet.bj.bcebos.com/models/retinanet_r101_fpn_2x_coco.pdparams) | -|retinaNet_r50_fpn_1x| student | 37.5 |[download](https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_1x_coco.pdparams) | -|retinaNet_r50_fpn_2x + CWD| student | 40.5 |[download](https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_2x_coco_cwd.pdparams) | -|gfl_r101_fpn_2x | teacher | 46.8 | [download](https://paddledet.bj.bcebos.com/models/gfl_r101vd_fpn_mstrain_2x_coco.pdparams) | -|gfl_r50_fpn_1x| student | 41.0 |[download](https://paddledet.bj.bcebos.com/models/gfl_r50_fpn_1x_coco.pdparams) | -|gfl_r50_fpn_2x + CWD| student | 44.0 |[download](https://paddledet.bj.bcebos.com/models/gfl_r50_fpn_2x_coco_cwd.pdparams) | -## PPYOLOE+模型蒸馏 +| 模型 | 方案 | 输入尺寸 | epochs | Box mAP | 配置文件 | 下载链接 | +| ----------------- | ----------- | ------ | :----: | :-----------: | :--------------: | :------------: | +| RetinaNet-ResNet101| teacher | 1333x800 | 2x | 40.6 | [config](../../retinanet/retinanet_r101_fpn_2x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/retinanet_r101_fpn_2x_coco.pdparams) | +| RetinaNet-ResNet50 | student | 1333x800 | 2x | 39.1 | [config](../../retinanet/retinanet_r50_fpn_2x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_2x_coco.pdparams) | +| RetinaNet-ResNet50 | CWD | 1333x800 | 2x | 40.5(+1.4) | [config](../../retinanet/retinanet_r50_fpn_2x_coco_cwd.yml),[slim_config](./retinanet_resnet101_coco_distill_cwd.yml) | [download](https://paddledet.bj.bcebos.com/models/retinanet_r50_fpn_2x_coco_cwd.pdparams) | +| GFL_ResNet101-vd| teacher | 1333x800 | 2x | 46.8 | [config](../../gfl/gfl_r101vd_fpn_mstrain_2x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/gfl_r101vd_fpn_mstrain_2x_coco.pdparams) | +| GFL_ResNet50 | student | 1333x800 | 1x | 41.0 | [config](../../gfl/gfl_r50_fpn_1x_coco.yml) | [download](https://paddledet.bj.bcebos.com/models/gfl_r50_fpn_1x_coco.pdparams) | +| GFL_ResNet50 | LD | 1333x800 | 2x | 44.0(+3.0) | [config](../../gfl/gfl_r50_fpn_2x_coco_cwd.yml),[slim_config](./gfl_r101vd_fpn_coco_distill_cwd.yml) | [download](https://bj.bcebos.com/v1/paddledet/models/gfl_r50_fpn_2x_coco_cwd.pdparams) | + + +## PPYOLOE+ 模型蒸馏 + +PaddleDetection提供了对PPYOLOE+ 进行模型蒸馏的方案,结合了logits蒸馏和feature蒸馏。 +| 模型 | 方案 | 输入尺寸 | epochs | Box mAP | 配置文件 | 下载链接 | +| ----------------- | ----------- | ------ | :----: | :-----------: | :--------------: | :------------: | +| PP-YOLOE+_x | teacher | 640 | 80e | 54.7 | [config](../../ppyoloe/ppyoloe_plus_crn_x_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_x_80e_coco.pdparams) | +| PP-YOLOE+_l | student | 640 | 80e | 52.9 | [config](../../ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco.pdparams) | +| PP-YOLOE+_l | distill | 640 | 80e | 53.9(+1.0) | [config](../../ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml),[slim_config](./ppyoloe_plus_distill_x_distill_l.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco_distill.pdparams) | +| PP-YOLOE+_l | teacher | 640 | 80e | 52.9 | [config](../../ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_l_80e_coco.pdparams) | +| PP-YOLOE+_m | student | 640 | 80e | 49.8 | [config](../../ppyoloe/ppyoloe_plus_crn_m_80e_coco.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_m_80e_coco.pdparams) | +| PP-YOLOE+_m | distill | 640 | 80e | 50.7(+0.9) | [config](../../ppyoloe/distill/ppyoloe_plus_crn_m_80e_coco_distill.yml),[slim_config](./ppyoloe_plus_distill_l_distill_m.yml) | [model](https://bj.bcebos.com/v1/paddledet/models/ppyoloe_plus_crn_m_80e_coco_distill.pdparams) | ## 快速开始 @@ -47,9 +69,9 @@ CWD全称为[Channel-wise Knowledge Distillation for Dense Prediction*](https:// ### 训练 ```shell # 单卡 -python tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_to_l.yml +python tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml # 多卡 -python3.7 -m paddle.distributed.launch --log_dir=ppyoloe_plus_distill_x_to_l/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_to_l.yml +python -m paddle.distributed.launch --log_dir=ppyoloe_plus_distill_x_distill_l/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml ``` - `-c`: 指定模型配置文件,也是student配置文件。 diff --git a/configs/slim/distill/ppyoloe_plus_distill_l_to_m.yml b/configs/slim/distill/ppyoloe_plus_distill_l_distill_m.yml similarity index 62% rename from configs/slim/distill/ppyoloe_plus_distill_l_to_m.yml rename to configs/slim/distill/ppyoloe_plus_distill_l_distill_m.yml index 46e0346d4..dbef4902e 100644 --- a/configs/slim/distill/ppyoloe_plus_distill_l_to_m.yml +++ b/configs/slim/distill/ppyoloe_plus_distill_l_distill_m.yml @@ -4,18 +4,35 @@ _BASE_: [ ] depth_mult: 1.0 width_mult: 1.0 - +for_distill: True architecture: PPYOLOE PPYOLOE: backbone: CSPResNet neck: CustomCSPPAN yolo_head: PPYOLOEHead post_process: ~ - for_distill: True pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_coco.pdparams find_unused_parameters: True -for_distill: True + +worker_num: 4 +TrainReader: + sample_transforms: + - Decode: {} + - RandomDistort: {} + - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} + - RandomCrop: {} + - RandomFlip: {} + batch_transforms: + - BatchRandomResize: {target_size: [640], random_size: True, random_interp: True, keep_ratio: False} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + - PadGT: {} + batch_size: 8 + shuffle: True + drop_last: True + use_shared_memory: True + collate_batch: True slim: Distill diff --git a/configs/slim/distill/ppyoloe_plus_distill_m_to_s.yml b/configs/slim/distill/ppyoloe_plus_distill_m_distill_s.yml similarity index 59% rename from configs/slim/distill/ppyoloe_plus_distill_m_to_s.yml rename to configs/slim/distill/ppyoloe_plus_distill_m_distill_s.yml index 46747b194..869e1bc2d 100644 --- a/configs/slim/distill/ppyoloe_plus_distill_m_to_s.yml +++ b/configs/slim/distill/ppyoloe_plus_distill_m_distill_s.yml @@ -1,21 +1,38 @@ # teacher and slim config _BASE_: [ - '../../ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml', + '../../ppyoloe/ppyoloe_plus_crn_m_80e_coco.yml', ] depth_mult: 0.67 width_mult: 0.75 - +for_distill: True architecture: PPYOLOE PPYOLOE: backbone: CSPResNet neck: CustomCSPPAN yolo_head: PPYOLOEHead post_process: ~ - for_distill: True pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_m_80e_coco.pdparams find_unused_parameters: True -for_distill: True + +worker_num: 4 +TrainReader: + sample_transforms: + - Decode: {} + - RandomDistort: {} + - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} + - RandomCrop: {} + - RandomFlip: {} + batch_transforms: + - BatchRandomResize: {target_size: [640], random_size: True, random_interp: True, keep_ratio: False} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + - PadGT: {} + batch_size: 8 + shuffle: True + drop_last: True + use_shared_memory: True + collate_batch: True slim: Distill diff --git a/configs/slim/distill/ppyoloe_plus_distill_x_to_l.yml b/configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml similarity index 62% rename from configs/slim/distill/ppyoloe_plus_distill_x_to_l.yml rename to configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml index 01512aa98..3f78deaef 100644 --- a/configs/slim/distill/ppyoloe_plus_distill_x_to_l.yml +++ b/configs/slim/distill/ppyoloe_plus_distill_x_distill_l.yml @@ -4,18 +4,35 @@ _BASE_: [ ] depth_mult: 1.33 width_mult: 1.25 - +for_distill: True architecture: PPYOLOE PPYOLOE: backbone: CSPResNet neck: CustomCSPPAN yolo_head: PPYOLOEHead post_process: ~ - for_distill: True pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_x_80e_coco.pdparams find_unused_parameters: True -for_distill: True + +worker_num: 4 +TrainReader: + sample_transforms: + - Decode: {} + - RandomDistort: {} + - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} + - RandomCrop: {} + - RandomFlip: {} + batch_transforms: + - BatchRandomResize: {target_size: [640], random_size: True, random_interp: True, keep_ratio: False} + - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} + - Permute: {} + - PadGT: {} + batch_size: 8 + shuffle: True + drop_last: True + use_shared_memory: True + collate_batch: True slim: Distill diff --git a/ppdet/slim/distill_model.py b/ppdet/slim/distill_model.py index 6ca085c43..c06f92f08 100644 --- a/ppdet/slim/distill_model.py +++ b/ppdet/slim/distill_model.py @@ -330,6 +330,8 @@ class PPYOLOEDistillModel(DistillModel): def forward(self, inputs, alpha=0.125): if self.training: + with paddle.no_grad(): + teacher_loss = self.teacher_model(inputs) if hasattr(self.teacher_model.yolo_head, "assigned_labels"): self.student_model.yolo_head.assigned_labels, self.student_model.yolo_head.assigned_bboxes, self.student_model.yolo_head.assigned_scores, self.student_model.yolo_head.mask_positive = \ self.teacher_model.yolo_head.assigned_labels, self.teacher_model.yolo_head.assigned_bboxes, self.teacher_model.yolo_head.assigned_scores, self.teacher_model.yolo_head.mask_positive @@ -338,8 +340,6 @@ class PPYOLOEDistillModel(DistillModel): delattr(self.teacher_model.yolo_head, "assigned_scores") delattr(self.teacher_model.yolo_head, "mask_positive") student_loss = self.student_model(inputs) - with paddle.no_grad(): - teacher_loss = self.teacher_model(inputs) logits_loss, feat_loss = self.distill_loss(self.teacher_model, self.student_model) -- GitLab