ppyoloe_plus_distill_x_distill_l.yml

# teacher and slim config
_BASE_: [
  '../../ppyoloe/ppyoloe_plus_crn_x_80e_coco.yml',
]
depth_mult: 1.33
width_mult: 1.25
for_distill: True
architecture: PPYOLOE
PPYOLOE:
  backbone: CSPResNet
  neck: CustomCSPPAN
  yolo_head: PPYOLOEHead
  post_process: ~

pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_x_80e_coco.pdparams
find_unused_parameters: True

worker_num: 4
TrainReader:
  sample_transforms:
    - Decode: {}
    - RandomDistort: {}
    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
    - RandomCrop: {}
    - RandomFlip: {}
  batch_transforms:
    - BatchRandomResize: {target_size: [640], random_size: True, random_interp: True, keep_ratio: False}
    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
    - Permute: {}
    - PadGT: {}
  batch_size: 8
  shuffle: True
  drop_last: True
  use_shared_memory: True
  collate_batch: True


slim: Distill
slim_method: PPYOLOEDistill
distill_loss: DistillPPYOLOELoss

DistillPPYOLOELoss: # X -> L
  loss_weight: {'logits': 4.0, 'feat': 1.0}
  logits_distill: True
  logits_loss_weight: {'class': 1.0, 'iou': 2.5, 'dfl': 0.5}
  feat_distill: True
  feat_distiller: 'fgd' # ['cwd', 'fgd', 'pkd', 'mgd', 'mimic']
  feat_distill_place: 'neck_feats'
  teacher_width_mult: 1.25 # X
  student_width_mult: 1.0 # L
  feat_out_channels: [768, 384, 192]  # The actual channel will multiply width_mult