# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function import paddle import paddle.nn as nn import paddle.nn.functional as F from paddle import ParamAttr from ppdet.core.workspace import register, create, load_config from ppdet.utils.checkpoint import load_pretrain_weight from ppdet.utils.logger import setup_logger logger = setup_logger(__name__) __all__ = [ 'DistillModel', 'FGDDistillModel', 'CWDDistillModel', 'LDDistillModel', 'PPYOLOEDistillModel', ] @register class DistillModel(nn.Layer): """ Build common distill model. Args: cfg: The student config. slim_cfg: The teacher and distill config. """ def __init__(self, cfg, slim_cfg): super(DistillModel, self).__init__() self.arch = cfg.architecture self.stu_cfg = cfg self.student_model = create(self.stu_cfg.architecture) if 'pretrain_weights' in self.stu_cfg and self.stu_cfg.pretrain_weights: stu_pretrain = self.stu_cfg.pretrain_weights else: stu_pretrain = None slim_cfg = load_config(slim_cfg) self.tea_cfg = slim_cfg self.teacher_model = create(self.tea_cfg.architecture) if 'pretrain_weights' in self.tea_cfg and self.tea_cfg.pretrain_weights: tea_pretrain = self.tea_cfg.pretrain_weights else: tea_pretrain = None self.distill_cfg = slim_cfg # load pretrain weights self.is_inherit = False if stu_pretrain: if self.is_inherit and tea_pretrain: load_pretrain_weight(self.student_model, tea_pretrain) logger.debug( "Inheriting! loading teacher weights to student model!") load_pretrain_weight(self.student_model, stu_pretrain) logger.info("Student model has loaded pretrain weights!") if tea_pretrain: load_pretrain_weight(self.teacher_model, tea_pretrain) logger.info("Teacher model has loaded pretrain weights!") self.teacher_model.eval() for param in self.teacher_model.parameters(): param.trainable = False self.distill_loss = self.build_loss(self.distill_cfg) def build_loss(self, distill_cfg): if 'distill_loss' in distill_cfg and distill_cfg.distill_loss: return create(distill_cfg.distill_loss) else: return None def parameters(self): return self.student_model.parameters() def forward(self, inputs): if self.training: student_loss = self.student_model(inputs) with paddle.no_grad(): teacher_loss = self.teacher_model(inputs) loss = self.distill_loss(self.teacher_model, self.student_model) student_loss['distill_loss'] = loss student_loss['teacher_loss'] = teacher_loss['loss'] student_loss['loss'] += student_loss['distill_loss'] return student_loss else: return self.student_model(inputs) @register class FGDDistillModel(DistillModel): """ Build FGD distill model. Args: cfg: The student config. slim_cfg: The teacher and distill config. """ def __init__(self, cfg, slim_cfg): super(FGDDistillModel, self).__init__(cfg=cfg, slim_cfg=slim_cfg) assert self.arch in ['RetinaNet', 'PicoDet' ], 'Unsupported arch: {}'.format(self.arch) self.is_inherit = True def build_loss(self, distill_cfg): assert 'distill_loss_name' in distill_cfg and distill_cfg.distill_loss_name assert 'distill_loss' in distill_cfg and distill_cfg.distill_loss loss_func = dict() name_list = distill_cfg.distill_loss_name for name in name_list: loss_func[name] = create(distill_cfg.distill_loss) return loss_func def forward(self, inputs): if self.training: s_body_feats = self.student_model.backbone(inputs) s_neck_feats = self.student_model.neck(s_body_feats) with paddle.no_grad(): t_body_feats = self.teacher_model.backbone(inputs) t_neck_feats = self.teacher_model.neck(t_body_feats) loss_dict = {} for idx, k in enumerate(self.distill_loss): loss_dict[k] = self.distill_loss[k](s_neck_feats[idx], t_neck_feats[idx], inputs) if self.arch == "RetinaNet": loss = self.student_model.head(s_neck_feats, inputs) elif self.arch == "PicoDet": head_outs = self.student_model.head( s_neck_feats, self.student_model.export_post_process) loss_gfl = self.student_model.head.get_loss(head_outs, inputs) total_loss = paddle.add_n(list(loss_gfl.values())) loss = {} loss.update(loss_gfl) loss.update({'loss': total_loss}) else: raise ValueError(f"Unsupported model {self.arch}") for k in loss_dict: loss['loss'] += loss_dict[k] loss[k] = loss_dict[k] return loss else: body_feats = self.student_model.backbone(inputs) neck_feats = self.student_model.neck(body_feats) head_outs = self.student_model.head(neck_feats) if self.arch == "RetinaNet": bbox, bbox_num = self.student_model.head.post_process( head_outs, inputs['im_shape'], inputs['scale_factor']) return {'bbox': bbox, 'bbox_num': bbox_num} elif self.arch == "PicoDet": head_outs = self.student_model.head( neck_feats, self.student_model.export_post_process) scale_factor = inputs['scale_factor'] bboxes, bbox_num = self.student_model.head.post_process( head_outs, scale_factor, export_nms=self.student_model.export_nms) return {'bbox': bboxes, 'bbox_num': bbox_num} else: raise ValueError(f"Unsupported model {self.arch}") @register class CWDDistillModel(DistillModel): """ Build CWD distill model. Args: cfg: The student config. slim_cfg: The teacher and distill config. """ def __init__(self, cfg, slim_cfg): super(CWDDistillModel, self).__init__(cfg=cfg, slim_cfg=slim_cfg) assert self.arch in ['GFL', 'RetinaNet'], 'Unsupported arch: {}'.format( self.arch) def build_loss(self, distill_cfg): assert 'distill_loss_name' in distill_cfg and distill_cfg.distill_loss_name assert 'distill_loss' in distill_cfg and distill_cfg.distill_loss loss_func = dict() name_list = distill_cfg.distill_loss_name for name in name_list: loss_func[name] = create(distill_cfg.distill_loss) return loss_func def get_loss_retinanet(self, stu_fea_list, tea_fea_list, inputs): loss = self.student_model.head(stu_fea_list, inputs) distill_loss = {} for idx, k in enumerate(self.loss_dic): distill_loss[k] = self.loss_dic[k](stu_fea_list[idx], tea_fea_list[idx]) loss['loss'] += distill_loss[k] loss[k] = distill_loss[k] return loss def get_loss_gfl(self, stu_fea_list, tea_fea_list, inputs): loss = {} head_outs = self.student_model.head(stu_fea_list) loss_gfl = self.student_model.head.get_loss(head_outs, inputs) loss.update(loss_gfl) total_loss = paddle.add_n(list(loss.values())) loss.update({'loss': total_loss}) feat_loss = {} loss_dict = {} s_cls_feat, t_cls_feat = [], [] for s_neck_f, t_neck_f in zip(stu_fea_list, tea_fea_list): conv_cls_feat, _ = self.student_model.head.conv_feat(s_neck_f) cls_score = self.student_model.head.gfl_head_cls(conv_cls_feat) t_conv_cls_feat, _ = self.teacher_model.head.conv_feat(t_neck_f) t_cls_score = self.teacher_model.head.gfl_head_cls(t_conv_cls_feat) s_cls_feat.append(cls_score) t_cls_feat.append(t_cls_score) for idx, k in enumerate(self.loss_dic): loss_dict[k] = self.loss_dic[k](s_cls_feat[idx], t_cls_feat[idx]) feat_loss[f"neck_f_{idx}"] = self.loss_dic[k](stu_fea_list[idx], tea_fea_list[idx]) for k in feat_loss: loss['loss'] += feat_loss[k] loss[k] = feat_loss[k] for k in loss_dict: loss['loss'] += loss_dict[k] loss[k] = loss_dict[k] return loss def forward(self, inputs): if self.training: s_body_feats = self.student_model.backbone(inputs) s_neck_feats = self.student_model.neck(s_body_feats) with paddle.no_grad(): t_body_feats = self.teacher_model.backbone(inputs) t_neck_feats = self.teacher_model.neck(t_body_feats) if self.arch == "RetinaNet": loss = self.get_loss_retinanet(s_neck_feats, t_neck_feats, inputs) elif self.arch == "GFL": loss = self.get_loss_gfl(s_neck_feats, t_neck_feats, inputs) else: raise ValueError(f"unsupported arch {self.arch}") return loss else: body_feats = self.student_model.backbone(inputs) neck_feats = self.student_model.neck(body_feats) head_outs = self.student_model.head(neck_feats) if self.arch == "RetinaNet": bbox, bbox_num = self.student_model.head.post_process( head_outs, inputs['im_shape'], inputs['scale_factor']) return {'bbox': bbox, 'bbox_num': bbox_num} elif self.arch == "GFL": bbox_pred, bbox_num = head_outs output = {'bbox': bbox_pred, 'bbox_num': bbox_num} return output else: raise ValueError(f"unsupported arch {self.arch}") @register class LDDistillModel(DistillModel): """ Build LD distill model. Args: cfg: The student config. slim_cfg: The teacher and distill config. """ def __init__(self, cfg, slim_cfg): super(LDDistillModel, self).__init__(cfg=cfg, slim_cfg=slim_cfg) assert self.arch in ['GFL'], 'Unsupported arch: {}'.format(self.arch) def forward(self, inputs): if self.training: s_body_feats = self.student_model.backbone(inputs) s_neck_feats = self.student_model.neck(s_body_feats) s_head_outs = self.student_model.head(s_neck_feats) with paddle.no_grad(): t_body_feats = self.teacher_model.backbone(inputs) t_neck_feats = self.teacher_model.neck(t_body_feats) t_head_outs = self.teacher_model.head(t_neck_feats) soft_label_list = t_head_outs[0] soft_targets_list = t_head_outs[1] student_loss = self.student_model.head.get_loss( s_head_outs, inputs, soft_label_list, soft_targets_list) total_loss = paddle.add_n(list(student_loss.values())) student_loss['loss'] = total_loss return student_loss else: return self.student_model(inputs) @register class PPYOLOEDistillModel(DistillModel): """ Build PPYOLOE distill model, only used in PPYOLOE Args: cfg: The student config. slim_cfg: The teacher and distill config. """ def __init__(self, cfg, slim_cfg): super(PPYOLOEDistillModel, self).__init__(cfg=cfg, slim_cfg=slim_cfg) assert self.arch in ['PPYOLOE'], 'Unsupported arch: {}'.format( self.arch) def forward(self, inputs, alpha=0.125): if self.training: with paddle.no_grad(): teacher_loss = self.teacher_model(inputs) if hasattr(self.teacher_model.yolo_head, "assigned_labels"): self.student_model.yolo_head.assigned_labels, self.student_model.yolo_head.assigned_bboxes, self.student_model.yolo_head.assigned_scores, self.student_model.yolo_head.mask_positive = \ self.teacher_model.yolo_head.assigned_labels, self.teacher_model.yolo_head.assigned_bboxes, self.teacher_model.yolo_head.assigned_scores, self.teacher_model.yolo_head.mask_positive delattr(self.teacher_model.yolo_head, "assigned_labels") delattr(self.teacher_model.yolo_head, "assigned_bboxes") delattr(self.teacher_model.yolo_head, "assigned_scores") delattr(self.teacher_model.yolo_head, "mask_positive") student_loss = self.student_model(inputs) logits_loss, feat_loss = self.distill_loss(self.teacher_model, self.student_model) det_total_loss = student_loss['loss'] total_loss = alpha * (det_total_loss + logits_loss + feat_loss) student_loss['loss'] = total_loss student_loss['det_loss'] = det_total_loss student_loss['logits_loss'] = logits_loss student_loss['feat_loss'] = feat_loss return student_loss else: return self.student_model(inputs)