提交 435144b5 编写于 作者: D dongshuilong

fix bugs imgnet and rec train

上级 452f5321
...@@ -50,6 +50,8 @@ class ArcMargin(nn.Layer): ...@@ -50,6 +50,8 @@ class ArcMargin(nn.Layer):
weight = paddle.divide(weight, weight_norm) weight = paddle.divide(weight, weight_norm)
cos = paddle.matmul(input, weight) cos = paddle.matmul(input, weight)
if not self.training:
return cos
sin = paddle.sqrt(1.0 - paddle.square(cos) + 1e-6) sin = paddle.sqrt(1.0 - paddle.square(cos) + 1e-6)
cos_m = math.cos(self.margin) cos_m = math.cos(self.margin)
sin_m = math.sin(self.margin) sin_m = math.sin(self.margin)
......
...@@ -39,6 +39,9 @@ Loss: ...@@ -39,6 +39,9 @@ Loss:
- TripletLossV2: - TripletLossV2:
weight: 1.0 weight: 1.0
margin: 0.5 margin: 0.5
Eval:
- CELoss:
weight: 1.0
Optimizer: Optimizer:
name: Momentum name: Momentum
......
...@@ -244,7 +244,10 @@ class Trainer(object): ...@@ -244,7 +244,10 @@ class Trainer(object):
batch[0] = paddle.to_tensor(batch[0]).astype("float32") batch[0] = paddle.to_tensor(batch[0]).astype("float32")
batch[1] = paddle.to_tensor(batch[1]).reshape([-1, 1]) batch[1] = paddle.to_tensor(batch[1]).reshape([-1, 1])
# image input # image input
out = self.model(batch[0]) if self.is_rec:
out = self.model(batch[0], batch[1])
else:
out = self.model(batch[0])
# calc build # calc build
if loss_func is not None: if loss_func is not None:
loss_dict = loss_func(out, batch[-1]) loss_dict = loss_func(out, batch[-1])
......
...@@ -43,6 +43,6 @@ class CombinedLoss(nn.Layer): ...@@ -43,6 +43,6 @@ class CombinedLoss(nn.Layer):
def build_loss(config): def build_loss(config):
module_class = CombinedLoss(config) module_class = CombinedLoss(copy.deepcopy(config))
logger.info("build loss {} success.".format(module_class)) logger.info("build loss {} success.".format(module_class))
return module_class return module_class
...@@ -22,6 +22,7 @@ class Loss(object): ...@@ -22,6 +22,7 @@ class Loss(object):
""" """
Loss Loss
""" """
def __init__(self, class_dim=1000, epsilon=None): def __init__(self, class_dim=1000, epsilon=None):
assert class_dim > 1, "class_dim=%d is not larger than 1" % (class_dim) assert class_dim > 1, "class_dim=%d is not larger than 1" % (class_dim)
self._class_dim = class_dim self._class_dim = class_dim
...@@ -35,22 +36,26 @@ class Loss(object): ...@@ -35,22 +36,26 @@ class Loss(object):
#do label_smoothing #do label_smoothing
def _labelsmoothing(self, target): def _labelsmoothing(self, target):
if target.shape[-1] != self._class_dim: if target.shape[-1] != self._class_dim:
one_hot_target = F.one_hot(target, self._class_dim) #do ont hot(23,34,46)-> 3 * _class_dim one_hot_target = F.one_hot(
target,
self._class_dim) #do ont hot(23,34,46)-> 3 * _class_dim
else: else:
one_hot_target = target one_hot_target = target
#do label_smooth #do label_smooth
soft_target = F.label_smooth(one_hot_target, epsilon=self._epsilon) #(1 - epsilon) * input + eposilon / K. soft_target = F.label_smooth(
one_hot_target,
epsilon=self._epsilon) #(1 - epsilon) * input + eposilon / K.
soft_target = paddle.reshape(soft_target, shape=[-1, self._class_dim]) soft_target = paddle.reshape(soft_target, shape=[-1, self._class_dim])
return soft_target return soft_target
def _crossentropy(self, input, target, use_pure_fp16=False): def _crossentropy(self, input, target, use_pure_fp16=False):
if self._label_smoothing: if self._label_smoothing:
target = self._labelsmoothing(target) target = self._labelsmoothing(target)
input = -F.log_softmax(input, axis=-1) #softmax and do log input = -F.log_softmax(input, axis=-1) #softmax and do log
cost = paddle.sum(target * input, axis=-1) #sum cost = paddle.sum(target * input, axis=-1) #sum
else: else:
cost = F.cross_entropy(input=input, label=target) cost = F.cross_entropy(input=input, label=target)
if use_pure_fp16: if use_pure_fp16:
avg_cost = paddle.sum(cost) avg_cost = paddle.sum(cost)
...@@ -64,9 +69,10 @@ class Loss(object): ...@@ -64,9 +69,10 @@ class Loss(object):
(target + eps) / (input + eps)) * self._class_dim (target + eps) / (input + eps)) * self._class_dim
return cost return cost
def _jsdiv(self, input, target): #so the input and target is the fc output; no softmax def _jsdiv(self, input,
target): #so the input and target is the fc output; no softmax
input = F.softmax(input) input = F.softmax(input)
target = F.softmax(target) target = F.softmax(target)
#two distribution #two distribution
cost = self._kldiv(input, target) + self._kldiv(target, input) cost = self._kldiv(input, target) + self._kldiv(target, input)
...@@ -87,14 +93,19 @@ class CELoss(Loss): ...@@ -87,14 +93,19 @@ class CELoss(Loss):
super(CELoss, self).__init__(class_dim, epsilon) super(CELoss, self).__init__(class_dim, epsilon)
def __call__(self, input, target, use_pure_fp16=False): def __call__(self, input, target, use_pure_fp16=False):
logits = input["logits"] if type(input) is dict:
logits = input["logits"]
else:
logits = input
cost = self._crossentropy(logits, target, use_pure_fp16) cost = self._crossentropy(logits, target, use_pure_fp16)
return {"CELoss": cost} return {"CELoss": cost}
class JSDivLoss(Loss): class JSDivLoss(Loss):
""" """
JSDiv loss JSDiv loss
""" """
def __init__(self, class_dim=1000, epsilon=None): def __init__(self, class_dim=1000, epsilon=None):
super(JSDivLoss, self).__init__(class_dim, epsilon) super(JSDivLoss, self).__init__(class_dim, epsilon)
...@@ -112,4 +123,3 @@ class KLDivLoss(paddle.nn.Layer): ...@@ -112,4 +123,3 @@ class KLDivLoss(paddle.nn.Layer):
p = paddle.nn.functional.softmax(p) p = paddle.nn.functional.softmax(p)
q = paddle.nn.functional.softmax(q) q = paddle.nn.functional.softmax(q)
return -(p * paddle.log(q + 1e-8)).sum(1).mean() return -(p * paddle.log(q + 1e-8)).sum(1).mean()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册