2021-06-02 16:31:57 +08:00
|
|
|
#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
|
|
|
|
#
|
|
|
|
#Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
#you may not use this file except in compliance with the License.
|
|
|
|
#You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
#Unless required by applicable law or agreed to in writing, software
|
|
|
|
#distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
#See the License for the specific language governing permissions and
|
|
|
|
#limitations under the License.
|
|
|
|
|
|
|
|
import paddle
|
|
|
|
import paddle.nn as nn
|
|
|
|
import paddle.nn.functional as F
|
|
|
|
|
|
|
|
from paddle.nn import L1Loss
|
|
|
|
from paddle.nn import MSELoss as L2Loss
|
|
|
|
from paddle.nn import SmoothL1Loss
|
|
|
|
|
|
|
|
|
|
|
|
class CELoss(nn.Layer):
|
2021-06-03 21:31:25 +08:00
|
|
|
def __init__(self, epsilon=None):
|
2021-06-02 16:31:57 +08:00
|
|
|
super().__init__()
|
|
|
|
if epsilon is not None and (epsilon <= 0 or epsilon >= 1):
|
|
|
|
epsilon = None
|
|
|
|
self.epsilon = epsilon
|
|
|
|
|
|
|
|
def _labelsmoothing(self, target, class_num):
|
|
|
|
if target.shape[-1] != class_num:
|
|
|
|
one_hot_target = F.one_hot(target, class_num)
|
|
|
|
else:
|
|
|
|
one_hot_target = target
|
|
|
|
soft_target = F.label_smooth(one_hot_target, epsilon=self.epsilon)
|
|
|
|
soft_target = paddle.reshape(soft_target, shape=[-1, class_num])
|
|
|
|
return soft_target
|
|
|
|
|
|
|
|
def forward(self, x, label):
|
|
|
|
loss_dict = {}
|
|
|
|
if self.epsilon is not None:
|
|
|
|
class_num = x.shape[-1]
|
|
|
|
label = self._labelsmoothing(label, class_num)
|
|
|
|
x = -F.log_softmax(x, axis=-1)
|
|
|
|
loss = paddle.sum(x * label, axis=-1)
|
|
|
|
else:
|
|
|
|
if label.shape[-1] == x.shape[-1]:
|
|
|
|
label = F.softmax(label, axis=-1)
|
|
|
|
soft_label = True
|
|
|
|
else:
|
|
|
|
soft_label = False
|
|
|
|
loss = F.cross_entropy(x, label=label, soft_label=soft_label)
|
2021-06-03 21:31:25 +08:00
|
|
|
return loss
|
2021-06-02 16:31:57 +08:00
|
|
|
|
|
|
|
|
2021-07-07 15:54:02 +08:00
|
|
|
class KLJSLoss(object):
|
|
|
|
def __init__(self, mode='kl'):
|
2021-09-09 13:08:25 +08:00
|
|
|
assert mode in ['kl', 'js', 'KL', 'JS'
|
|
|
|
], "mode can only be one of ['kl', 'js', 'KL', 'JS']"
|
2021-07-07 15:54:02 +08:00
|
|
|
self.mode = mode
|
|
|
|
|
|
|
|
def __call__(self, p1, p2, reduction="mean"):
|
|
|
|
|
2021-09-09 13:08:25 +08:00
|
|
|
loss = paddle.multiply(p2, paddle.log((p2 + 1e-5) / (p1 + 1e-5) + 1e-5))
|
2021-07-07 15:54:02 +08:00
|
|
|
|
|
|
|
if self.mode.lower() == "js":
|
2021-09-09 13:08:25 +08:00
|
|
|
loss += paddle.multiply(
|
|
|
|
p1, paddle.log((p1 + 1e-5) / (p2 + 1e-5) + 1e-5))
|
2021-07-07 15:54:02 +08:00
|
|
|
loss *= 0.5
|
|
|
|
if reduction == "mean":
|
2021-09-09 13:08:25 +08:00
|
|
|
loss = paddle.mean(loss, axis=[1, 2])
|
|
|
|
elif reduction == "none" or reduction is None:
|
|
|
|
return loss
|
2021-07-07 15:54:02 +08:00
|
|
|
else:
|
2021-09-09 13:08:25 +08:00
|
|
|
loss = paddle.sum(loss, axis=[1, 2])
|
|
|
|
|
|
|
|
return loss
|
2021-07-07 15:54:02 +08:00
|
|
|
|
|
|
|
|
2021-06-02 16:31:57 +08:00
|
|
|
class DMLLoss(nn.Layer):
|
|
|
|
"""
|
|
|
|
DMLLoss
|
|
|
|
"""
|
|
|
|
|
2021-09-09 13:08:25 +08:00
|
|
|
def __init__(self, act=None, use_log=False):
|
2021-06-02 16:31:57 +08:00
|
|
|
super().__init__()
|
2021-06-03 13:30:43 +08:00
|
|
|
if act is not None:
|
|
|
|
assert act in ["softmax", "sigmoid"]
|
|
|
|
if act == "softmax":
|
|
|
|
self.act = nn.Softmax(axis=-1)
|
|
|
|
elif act == "sigmoid":
|
|
|
|
self.act = nn.Sigmoid()
|
|
|
|
else:
|
|
|
|
self.act = None
|
2021-09-09 13:08:25 +08:00
|
|
|
|
|
|
|
self.use_log = use_log
|
|
|
|
|
2021-07-07 15:54:02 +08:00
|
|
|
self.jskl_loss = KLJSLoss(mode="js")
|
2021-06-02 16:31:57 +08:00
|
|
|
|
|
|
|
def forward(self, out1, out2):
|
2021-06-03 13:30:43 +08:00
|
|
|
if self.act is not None:
|
|
|
|
out1 = self.act(out1)
|
|
|
|
out2 = self.act(out2)
|
2021-09-09 13:08:25 +08:00
|
|
|
if self.use_log:
|
|
|
|
# for recognition distillation, log is needed for feature map
|
2021-07-07 15:54:02 +08:00
|
|
|
log_out1 = paddle.log(out1)
|
|
|
|
log_out2 = paddle.log(out2)
|
|
|
|
loss = (F.kl_div(
|
|
|
|
log_out1, out2, reduction='batchmean') + F.kl_div(
|
|
|
|
log_out2, out1, reduction='batchmean')) / 2.0
|
|
|
|
else:
|
2021-09-09 13:08:25 +08:00
|
|
|
# for detection distillation log is not needed
|
2021-07-07 15:54:02 +08:00
|
|
|
loss = self.jskl_loss(out1, out2)
|
2021-06-03 21:31:25 +08:00
|
|
|
return loss
|
2021-06-02 16:31:57 +08:00
|
|
|
|
|
|
|
|
|
|
|
class DistanceLoss(nn.Layer):
|
|
|
|
"""
|
|
|
|
DistanceLoss:
|
|
|
|
mode: loss mode
|
|
|
|
"""
|
|
|
|
|
2021-06-03 21:31:25 +08:00
|
|
|
def __init__(self, mode="l2", **kargs):
|
2021-06-03 13:57:31 +08:00
|
|
|
super().__init__()
|
2021-06-02 16:31:57 +08:00
|
|
|
assert mode in ["l1", "l2", "smooth_l1"]
|
|
|
|
if mode == "l1":
|
|
|
|
self.loss_func = nn.L1Loss(**kargs)
|
2021-06-03 13:30:43 +08:00
|
|
|
elif mode == "l2":
|
2021-06-02 16:31:57 +08:00
|
|
|
self.loss_func = nn.MSELoss(**kargs)
|
|
|
|
elif mode == "smooth_l1":
|
|
|
|
self.loss_func = nn.SmoothL1Loss(**kargs)
|
|
|
|
|
|
|
|
def forward(self, x, y):
|
2021-06-03 21:31:25 +08:00
|
|
|
return self.loss_func(x, y)
|