diff --git a/configs/slim/distill/ppyoloe_plus_distill_m_distill_s.yml b/configs/slim/distill/ppyoloe_plus_distill_m_distill_s.yml index 869e1bc2de94dfd84f2e21302c74585120e70f53..8ee944e9b91394b7bcfbf89a9610c302d803c9bb 100644 --- a/configs/slim/distill/ppyoloe_plus_distill_m_distill_s.yml +++ b/configs/slim/distill/ppyoloe_plus_distill_m_distill_s.yml @@ -43,6 +43,8 @@ DistillPPYOLOELoss: # M -> S loss_weight: {'logits': 4.0, 'feat': 1.0} logits_distill: True logits_loss_weight: {'class': 1.0, 'iou': 2.5, 'dfl': 0.5} + logits_ld_distill: True + logits_ld_params: {'weight': 20000, 'T': 10} feat_distill: True feat_distiller: 'fgd' # ['cwd', 'fgd', 'pkd', 'mgd', 'mimic'] feat_distill_place: 'neck_feats' diff --git a/ppdet/modeling/heads/ppyoloe_head.py b/ppdet/modeling/heads/ppyoloe_head.py index 1eb735194c52435a87fcca8516f80b5dc2cca370..201aa48120562324fabdeeec341785a76f9b00c4 100644 --- a/ppdet/modeling/heads/ppyoloe_head.py +++ b/ppdet/modeling/heads/ppyoloe_head.py @@ -323,6 +323,7 @@ class PPYOLOEHead(nn.Layer): assigned_bboxes, assigned_scores, assigned_scores_sum): # select positive samples mask mask_positive = (assigned_labels != self.num_classes) + self.distill_pairs['mask_positive_select'] = mask_positive num_pos = mask_positive.sum() # pos/neg loss if num_pos > 0: diff --git a/ppdet/slim/distill_loss.py b/ppdet/slim/distill_loss.py index a0539277fbec1c8f4cdbdd2c3a672aa719776e2c..6e94fd8410e351fcaa2719fc1b6e78868905c888 100644 --- a/ppdet/slim/distill_loss.py +++ b/ppdet/slim/distill_loss.py @@ -212,6 +212,9 @@ class DistillPPYOLOELoss(nn.Layer): logits_loss_weight={'class': 1.0, 'iou': 2.5, 'dfl': 0.5}, + logits_ld_distill=False, + logits_ld_params={'weight': 20000, + 'T': 10}, feat_distill=True, feat_distiller='fgd', feat_distill_place='neck_feats', @@ -222,6 +225,7 @@ class DistillPPYOLOELoss(nn.Layer): self.loss_weight_logits = loss_weight['logits'] self.loss_weight_feat = loss_weight['feat'] self.logits_distill = logits_distill + self.logits_ld_distill = logits_ld_distill self.feat_distill = feat_distill if logits_distill and self.loss_weight_logits > 0: @@ -230,6 +234,10 @@ class DistillPPYOLOELoss(nn.Layer): self.qfl_loss_weight = logits_loss_weight['class'] self.loss_bbox = GIoULoss() + if logits_ld_distill: + self.loss_kd = KnowledgeDistillationKLDivLoss( + loss_weight=logits_ld_params['weight'], T=logits_ld_params['T']) + if feat_distill and self.loss_weight_feat > 0: assert feat_distiller in ['cwd', 'fgd', 'pkd', 'mgd', 'mimic'] assert feat_distill_place in ['backbone_feats', 'neck_feats'] @@ -334,6 +342,20 @@ class DistillPPYOLOELoss(nn.Layer): loss_dfl = loss_dfl.mean(-1) return loss_dfl / 4.0 # 4 direction + def main_kd(self, mask_positive, pred_scores, soft_cls, num_classes): + num_pos = mask_positive.sum() + if num_pos > 0: + cls_mask = mask_positive.unsqueeze(-1).tile([1, 1, num_classes]) + pred_scores_pos = paddle.masked_select( + pred_scores, cls_mask).reshape([-1, num_classes]) + soft_cls_pos = paddle.masked_select( + soft_cls, cls_mask).reshape([-1, num_classes]) + loss_kd = self.loss_kd( + pred_scores_pos, soft_cls_pos, avg_factor=num_pos) + else: + loss_kd = paddle.zeros([1]) + return loss_kd + def forward(self, teacher_model, student_model): teacher_distill_pairs = teacher_model.yolo_head.distill_pairs student_distill_pairs = student_model.yolo_head.distill_pairs @@ -373,8 +395,15 @@ class DistillPPYOLOELoss(nn.Layer): distill_cls_loss = paddle.add_n(distill_cls_loss) distill_bbox_loss = paddle.add_n(distill_bbox_loss) distill_dfl_loss = paddle.add_n(distill_dfl_loss) - logits_loss = distill_bbox_loss * self.bbox_loss_weight + distill_cls_loss * self.qfl_loss_weight + distill_dfl_loss * self.dfl_loss_weight + + if self.logits_ld_distill: + loss_kd = self.main_kd( + student_distill_pairs['mask_positive_select'], + student_distill_pairs['pred_cls_scores'], + teacher_distill_pairs['pred_cls_scores'], + student_model.yolo_head.num_classes, ) + logits_loss += loss_kd else: logits_loss = paddle.zeros([1])