diff --git a/dygraph/cvlibs/param_init.py b/dygraph/cvlibs/param_init.py index 16793aaef98b895016da340eac76e99501b39adb..567399c0a0c7d2310931b1c0ccae13cd0d5422b1 100644 --- a/dygraph/cvlibs/param_init.py +++ b/dygraph/cvlibs/param_init.py @@ -15,11 +15,11 @@ import paddle.fluid as fluid -def constant_init(param, value=0.0): - initializer = fluid.initializer.Constant(value) +def constant_init(param, **kwargs): + initializer = fluid.initializer.Constant(**kwargs) initializer(param, param.block) -def normal_init(param, loc=0.0, scale=1.0, seed=0): - initializer = fluid.initializer.Normal(loc=loc, scale=scale, seed=seed) +def normal_init(param, **kwargs): + initializer = fluid.initializer.Normal(**kwargs) initializer(param, param.block) diff --git a/dygraph/models/architectures/hrnet.py b/dygraph/models/architectures/hrnet.py index 821afd1b617b4b4fa89914434e5ee0e2044a43f0..ea3db3b15f4bb2235761a3dea87ad3f49af3bbc9 100644 --- a/dygraph/models/architectures/hrnet.py +++ b/dygraph/models/architectures/hrnet.py @@ -146,7 +146,8 @@ class HRNet(fluid.dygraph.Layer): has_se=self.has_se, name="st4") - self.init_weight(backbone_pretrained) + if self.training: + self.init_weight(backbone_pretrained) def forward(self, x, label=None, mode='train'): input_shape = x.shape[2:] diff --git a/dygraph/models/fcn.py b/dygraph/models/fcn.py index f6179aebc544b06ea529c073adfa99151bb77b1a..a852cff88bc6062c154c8c9df0d5c99b604f0abf 100644 --- a/dygraph/models/fcn.py +++ b/dygraph/models/fcn.py @@ -86,7 +86,8 @@ class FCN(fluid.dygraph.Layer): filter_size=1, stride=1, padding=0) - self.init_weight(model_pretrained) + if self.training: + self.init_weight(model_pretrained) def forward(self, x): input_shape = x.shape[2:] @@ -132,36 +133,6 @@ class FCN(fluid.dygraph.Layer): raise Exception('Pretrained model is not found: {}'.format( pretrained_model)) - # def _get_loss(self, logit, label): - # """ - # compute forward loss of the model - - # Args: - # logit (tensor): the logit of model output - # label (tensor): ground truth - - # Returns: - # avg_loss (tensor): forward loss - # """ - # logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - # label = fluid.layers.transpose(label, [0, 2, 3, 1]) - # mask = label != self.ignore_index - # mask = fluid.layers.cast(mask, 'float32') - # loss, probs = fluid.layers.softmax_with_cross_entropy( - # logit, - # label, - # ignore_index=self.ignore_index, - # return_softmax=True, - # axis=-1) - - # loss = loss * mask - # avg_loss = fluid.layers.mean(loss) / ( - # fluid.layers.mean(mask) + self.EPS) - - # label.stop_gradient = True - # mask.stop_gradient = True - # return avg_loss - class ConvBNLayer(fluid.dygraph.Layer): def __init__(self, diff --git a/dygraph/models/losses/cross_entroy_loss.py b/dygraph/models/losses/cross_entroy_loss.py index ca18ea4247aa3d5821c1ab428c2ee4f9d031b850..d37116d186556b1c9e3a3e2d71ebca84653348ca 100644 --- a/dygraph/models/losses/cross_entroy_loss.py +++ b/dygraph/models/losses/cross_entroy_loss.py @@ -17,8 +17,7 @@ from paddle import nn import paddle.nn.functional as F from dygraph.cvlibs import manager - - +''' @manager.LOSSES.add_component class CrossEntropyLoss(nn.CrossEntropyLoss): """ @@ -40,8 +39,9 @@ class CrossEntropyLoss(nn.CrossEntropyLoss): """ def __init__(self, weight=None, ignore_index=255, reduction='mean'): - super(CrossEntropyLoss, self).__init__( - weight=weight, ignore_index=ignore_index, reduction=reduction) + self.weight = weight + self.ignore_index = ignore_index + self.reduction = reduction self.EPS = 1e-5 if self.reduction not in ['sum', 'mean', 'none']: raise ValueError( @@ -71,6 +71,49 @@ class CrossEntropyLoss(nn.CrossEntropyLoss): mask = paddle.cast(mask, 'float32') avg_loss = loss / (paddle.mean(mask) + self.EPS) + label.stop_gradient = True + mask.stop_gradient = True + return avg_loss +''' + + +@manager.LOSSES.add_component +class CrossEntropyLoss(nn.Layer): + """ + Implements the cross entropy loss function. + + Args: + ignore_index (int64): Specifies a target value that is ignored + and does not contribute to the input gradient. Default ``255``. + """ + + def __init__(self, ignore_index=255): + super(CrossEntropyLoss, self).__init__() + self.ignore_index = ignore_index + self.EPS = 1e-5 + + def forward(self, logit, label): + """ + Forward computation. + Args: + logit (Tensor): logit tensor, the data type is float32, float64. Shape is + (N, C), where C is number of classes, and if shape is more than 2D, this + is (N, C, D1, D2,..., Dk), k >= 1. + label (Variable): label tensor, the data type is int64. Shape is (N), where each + value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is + (N, D1, D2,..., Dk), k >= 1. + """ + if len(label.shape) != len(logit.shape): + label = paddle.unsqueeze(label, 1) + + loss = F.softmax_with_cross_entropy( + logit, label, ignore_index=self.ignore_index, axis=1) + loss = paddle.reduce_mean(loss) + + mask = label != self.ignore_index + mask = paddle.cast(mask, 'float32') + avg_loss = loss / (paddle.mean(mask) + self.EPS) + label.stop_gradient = True mask.stop_gradient = True return avg_loss