#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # #Licensed under the Apache License, Version 2.0 (the "License"); #you may not use this file except in compliance with the License. #You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # #Unless required by applicable law or agreed to in writing, software #distributed under the License is distributed on an "AS IS" BASIS, #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #See the License for the specific language governing permissions and #limitations under the License. import paddle import paddle.nn as nn import paddle.nn.functional as F from paddle.nn import L1Loss from paddle.nn import MSELoss as L2Loss from paddle.nn import SmoothL1Loss __all__ = [ "CELoss", "DMLLoss", "DistanceLoss", "RKdAngle", "RkdDistance", "KLLoss" ] class CELoss(nn.Layer): """ CELoss: cross entropy loss Args: epsilon(float | None): label smooth epsilon. If it is None or not in range (0, 1), then label smooth will not be used. label_act(string | None): activation function, it works when the label is also the logits rather than the groundtruth label. axis(int): axis used to calculate cross entropy loss. """ def __init__(self, epsilon=None, label_act="softmax", axis=-1): super().__init__() if epsilon is not None and (epsilon <= 0 or epsilon >= 1): epsilon = None assert label_act in ["softmax", None] if epsilon is not None and (epsilon >= 1 or epsilon <= 0): epsilon = None self.epsilon = epsilon self.label_act = label_act self.axis = axis def _labelsmoothing(self, target, class_num): if target.shape[-1] != class_num: one_hot_target = F.one_hot(target, class_num) else: one_hot_target = target soft_target = F.label_smooth(one_hot_target, epsilon=self.epsilon) soft_target = paddle.reshape(soft_target, shape=[-1, class_num]) return soft_target def forward(self, x, label): assert len(x.shape) == len(label.shape), \ "x and label shape length should be same but got {} for x.shape and {} for label.shape".format(x.shape, label.shape) if self.epsilon is not None: class_num = x.shape[-1] label = self._labelsmoothing(label, class_num) x = -F.log_softmax(x, axis=self.axis) loss = paddle.sum(x * label, axis=self.axis) else: if label.shape[self.axis] == x.shape[self.axis]: if self.label_act == "softmax": label = F.softmax(label, axis=self.axis) soft_label = True else: soft_label = False loss = F.cross_entropy( x, label=label, soft_label=soft_label, axis=self.axis) loss = loss.mean() return loss class DMLLoss(nn.Layer): """ DMLLoss Args: act(string | None): activation function used to activate the input tensor axis(int): axis used to build activation function """ def __init__(self, act=None, axis=-1): super().__init__() if act is not None: assert act in ["softmax", "sigmoid"] if act == "softmax": self.act = nn.Softmax(axis=axis) elif act == "sigmoid": self.act = nn.Sigmoid() else: self.act = None def forward(self, out1, out2): if self.act is not None: out1 = self.act(out1) out2 = self.act(out2) log_out1 = paddle.log(out1) log_out2 = paddle.log(out2) loss = (F.kl_div( log_out1, out2, reduction='batchmean') + F.kl_div( log_out2, out1, reduction='batchmean')) / 2.0 return loss class KLLoss(nn.Layer): """ KLLoss. Args: act(string | None): activation function used for the input and label tensor. It supports None, softmax and sigmoid. Default: softmax. axis(int): the axis for the act. Default: -1. reduction(str): the reduction params for F.kl_div. Default: mean. """ def __init__(self, act='softmax', axis=-1, reduction='mean'): super().__init__() assert act in ['softmax', 'sigmoid', None] self.reduction = reduction if act == 'softmax': self.act = nn.Softmax(axis=axis) elif act == 'sigmoid': self.act = nn.Sigmoid() else: self.act = None def forward(self, input, label): """ Args: input(Tensor): The input tensor. label(Tensor): The label tensor. The shape of label is the same as input. Returns: Tensor: The kl loss. """ assert input.shape == label.shape, \ "The shape of label should be the same as input." if self.act is not None: input = self.act(input) label = self.act(label) log_input = paddle.log(input) loss = F.kl_div(log_input, label, reduction=self.reduction) return loss class DistanceLoss(nn.Layer): """ DistanceLoss Args: mode: loss mode kargs(dict): used to build corresponding loss function, for more details, please refer to: L1loss: https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/nn/L1Loss_cn.html#l1loss L2Loss: https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/nn/MSELoss_cn.html#mseloss SmoothL1Loss: https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/nn/SmoothL1Loss_cn.html#smoothl1loss """ def __init__(self, mode="l2", **kargs): super().__init__() assert mode in ["l1", "l2", "smooth_l1"] if mode == "l1": self.loss_func = nn.L1Loss(**kargs) elif mode == "l2": self.loss_func = nn.MSELoss(**kargs) elif mode == "smooth_l1": self.loss_func = nn.SmoothL1Loss(**kargs) def forward(self, x, y): return self.loss_func(x, y) def pdist(e, squared=False, eps=1e-12): e_square = e.pow(2).sum(axis=1) prod = paddle.mm(e, e.t()) res = (e_square.unsqueeze(1) + e_square.unsqueeze(0) - 2 * prod).clip( min=eps) if not squared: res = res.sqrt() return res class RKdAngle(nn.Layer): """ RKdAngle loss, see https://arxiv.org/abs/1904.05068 """ def __init__(self): super().__init__() def forward(self, student, teacher): # reshape for feature map distillation bs = student.shape[0] student = student.reshape([bs, -1]) teacher = teacher.reshape([bs, -1]) td = (teacher.unsqueeze(0) - teacher.unsqueeze(1)) norm_td = F.normalize(td, p=2, axis=2) t_angle = paddle.bmm(norm_td, norm_td.transpose([0, 2, 1])).reshape( [-1, 1]) sd = (student.unsqueeze(0) - student.unsqueeze(1)) norm_sd = F.normalize(sd, p=2, axis=2) s_angle = paddle.bmm(norm_sd, norm_sd.transpose([0, 2, 1])).reshape( [-1, 1]) loss = F.smooth_l1_loss(s_angle, t_angle, reduction='mean') return loss class RkdDistance(nn.Layer): """ RkdDistance loss, see https://arxiv.org/abs/1904.05068 Args: eps(float): epsilon for the pdist function """ def __init__(self, eps=1e-12): super().__init__() self.eps = eps def forward(self, student, teacher): bs = student.shape[0] student = student.reshape([bs, -1]) teacher = teacher.reshape([bs, -1]) t_d = pdist(teacher, squared=False, eps=self.eps) mean_td = t_d.mean() t_d = t_d / (mean_td + self.eps) d = pdist(student, squared=False, eps=self.eps) mean_d = d.mean() d = d / (mean_d + self.eps) loss = F.smooth_l1_loss(d, t_d, reduction="mean") return loss