# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function import paddle import paddle.nn as nn import paddle.nn.functional as F from ppdet.core.workspace import register from ppdet.modeling.losses.iou_loss import GIoULoss from ppdet.modeling.transformers import bbox_cxcywh_to_xyxy, sigmoid_focal_loss __all__ = ['OWLViTLoss'] @register class OWLViTLoss(nn.Layer): __shared__ = ['num_classes'] __inject__ = ['HungarianMatcher'] def __init__(self, num_classes, matcher='HungarianMatcher', normalization='per_example', loss_coeff=None, use_focal_loss=None, alpha=None, gamma=None): super().__init__() self.giou_loss = GIoULoss() self.num_classes = num_classes self.matcher = matcher self.loss_coeff = matcher.matcher_coeff if loss_coeff is None else loss_coeff self.use_focal_loss = matcher.use_focal_loss if use_focal_loss is None else use_focal_loss self.alpha = matcher.alpha if alpha is None else alpha self.gamma = matcher.gamma if gamma is None else gamma assert normalization in [ 'per_example', 'global' ], f'{normalization} should be in [pre_example, global]' self.normalization = normalization def _get_loss_class(self, logits, gt_class, match_indices): # logits: [b, query, num_classes], gt_class: list[[n, 1]] target_label = paddle.full( logits.shape[:2], self.num_classes, dtype='int64') bs, num_query_objects = target_label.shape if sum(len(a) for a in gt_class) > 0: index, updates = self._get_index_updates(num_query_objects, gt_class, match_indices) target_label = paddle.scatter( target_label.reshape([-1, 1]), index, updates.astype('int64')) target_label = target_label.reshape([bs, num_query_objects]) if self.use_focal_loss: target_label = F.one_hot(target_label, self.num_classes + 1)[..., :-1] if self.use_focal_loss: loss_cls = F.sigmoid_focal_loss( logits, target_label, alpha=self.alpha, gamma=self.gamma, reduction='none') else: loss_cls = F.cross_entropy(logits, target_label, reduction='none') return loss_cls.sum(axis=[1, 2]) def _get_loss_bbox(self, boxes, gt_bbox, match_indices): src_bbox, target_bbox = self._get_src_target_assign(boxes, gt_bbox, match_indices) src_box = bbox_cxcywh_to_xyxy(src_bbox) target_bbox = bbox_cxcywh_to_xyxy(target_bbox) loss_bbox = F.l1_loss(src_bbox, target_bbox, reduction='none') loss_giou = self.giou_loss(src_bbox, target_bbox) return loss_bbox.sum(axis=1), loss_giou.sum(axis=1) def _get_src_target_assign(self, src, target, match_indices): src_assign = paddle.concat([ paddle.gather( t, I, axis=0) if len(I) > 0 else paddle.zeros([0, t.shape[-1]]) for t, (I, _) in zip(src, match_indices) ]) target_assign = paddle.concat([ paddle.gather( t, J, axis=0) if len(J) > 0 else paddle.zeros([0, t.shape[-1]]) for t, (_, J) in zip(target, match_indices) ]) return src_assign, target_assign def forward(self, head_outs, gt_meta): logits, boxes = head_outs gt_class, gt_bbox = gt_meta['gt_class'], gt_meta['gt_bbox'] match_indices = self.matcher(boxes.detach(), logits.detach(), gt_bbox, gt_class) loss_cls = self._get_loss_class(logits, gt_class, match_indices) loss_bbox, loss_giou = self._get_loss_bbox(boxes, gt_bbox, match_indices) num_gts = paddle.to_tensor([len(a) for a in gt_class]) if self.normalization == 'per_example': num_gts = paddle.clip(num_gts, min=1) loss_cls = (loss_cls / num_gts).mean() loss_bbox = (loss_bbox / num_gts).mean() loss_giou = (loss_giou / num_gts).mean() # normalize_fn = lambda x : (x / num_gts).mean() else: num_gts = paddle.distributed.all_reduce(num_gts) num_gts = paddle.clip( num_gts / paddle.distributed.get_world_size(), min=1) loss_cls = loss_cls.sum() / num_gts loss_bbox = loss_bbox.sum() / num_gts loss_giou = loss_giou.sum() / num_gts # normalize_fn = lambda x: x.sum() / num_gts # loss_cls, loss_box, loss_giou = [normalize_fn(l) for l in [loss_cls, loss_box, loss_giou]] loss = self.loss_coeff['class'] * loss_cls + \ self.loss_coeff['bbox'] * loss_bbox + \ self.loss_coeff['giou'] * loss_giou return { 'loss': loss, 'loss_cls': loss_cls, 'loss_bbox': loss_bbox, 'loss_giou': loss_giou }