提交 fc79997d 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!5502 Mod SoftmaxCrossEntropyWithlogits

Merge pull request !5502 from wanyiming/mod_SoftmaxCrossEntropyWithlogits
...@@ -213,13 +213,9 @@ class SoftmaxCrossEntropyWithLogits(_Loss): ...@@ -213,13 +213,9 @@ class SoftmaxCrossEntropyWithLogits(_Loss):
of entry is a valid one. of entry is a valid one.
Args: Args:
is_grad (bool): Specifies whether calculate grad only. Default: True.
sparse (bool): Specifies whether labels use sparse format or not. Default: False. sparse (bool): Specifies whether labels use sparse format or not. Default: False.
reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none". reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none".
If "none", do not perform reduction. Default: "none". If "none", do not perform reduction. Default: "none".
smooth_factor (float): Label smoothing factor. It is a optional input which should be in range [0, 1].
Default: 0.
num_classes (int): The number of classes in the task. It is a optional input Default: 2.
Inputs: Inputs:
- **logits** (Tensor) - Tensor of shape (N, C). - **logits** (Tensor) - Tensor of shape (N, C).
...@@ -238,29 +234,22 @@ class SoftmaxCrossEntropyWithLogits(_Loss): ...@@ -238,29 +234,22 @@ class SoftmaxCrossEntropyWithLogits(_Loss):
>>> loss(logits, labels) >>> loss(logits, labels)
""" """
def __init__(self, def __init__(self,
is_grad=True,
sparse=False, sparse=False,
reduction='none', reduction='none'):
smooth_factor=0,
num_classes=2):
super(SoftmaxCrossEntropyWithLogits, self).__init__(reduction) super(SoftmaxCrossEntropyWithLogits, self).__init__(reduction)
self.is_grad = is_grad
self.sparse = sparse self.sparse = sparse
validator.check_number_range( self.reduction = reduction
"smooth_factor", smooth_factor, 0, 1, Rel.INC_BOTH, self.cls_name)
self.smooth_factor = smooth_factor
self.num_classes = num_classes
self.softmax_cross_entropy = _selected_ops.SoftmaxCrossEntropyWithLogits() self.softmax_cross_entropy = _selected_ops.SoftmaxCrossEntropyWithLogits()
self.one_hot = P.OneHot() self.one_hot = P.OneHot()
self.on_value = Tensor(1.0 - self.smooth_factor, mstype.float32) self.on_value = Tensor(1.0, mstype.float32)
self.off_value = Tensor(1.0 * self.smooth_factor / (self.num_classes - 1), mstype.float32) self.off_value = Tensor(0., mstype.float32)
self.is_cpugpu = context.get_context('device_target') in ["CPU", "GPU"] self.is_cpugpu = context.get_context('device_target') in ["CPU", "GPU"]
if self.is_cpugpu: if self.is_cpugpu:
self.sparse_softmax_cross_entropy = P.SparseSoftmaxCrossEntropyWithLogits(is_grad=self.is_grad) self.sparse_softmax_cross_entropy = P.SparseSoftmaxCrossEntropyWithLogits()
def construct(self, logits, labels): def construct(self, logits, labels):
if self.is_cpugpu and self.sparse: if self.is_cpugpu and self.sparse and self.reduction == 'mean':
x = self.sparse_softmax_cross_entropy(logits, labels) x = self.sparse_softmax_cross_entropy(logits, labels)
return x return x
......
...@@ -115,7 +115,7 @@ class UncertaintyEvaluation: ...@@ -115,7 +115,7 @@ class UncertaintyEvaluation:
self.epi_uncer_model = EpistemicUncertaintyModel(self.epi_model) self.epi_uncer_model = EpistemicUncertaintyModel(self.epi_model)
if self.epi_uncer_model.drop_count == 0: if self.epi_uncer_model.drop_count == 0:
if self.task_type == 'classification': if self.task_type == 'classification':
net_loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") net_loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
net_opt = Adam(self.epi_uncer_model.trainable_params()) net_opt = Adam(self.epi_uncer_model.trainable_params())
model = Model(self.epi_uncer_model, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) model = Model(self.epi_uncer_model, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
else: else:
...@@ -314,7 +314,7 @@ class AleatoricLoss(Cell): ...@@ -314,7 +314,7 @@ class AleatoricLoss(Cell):
self.exp = P.Exp() self.exp = P.Exp()
self.normal = C.normal self.normal = C.normal
self.to_tensor = P.ScalarToArray() self.to_tensor = P.ScalarToArray()
self.entropy = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") self.entropy = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
else: else:
self.mean = P.ReduceMean() self.mean = P.ReduceMean()
self.exp = P.Exp() self.exp = P.Exp()
......
...@@ -44,7 +44,7 @@ if __name__ == "__main__": ...@@ -44,7 +44,7 @@ if __name__ == "__main__":
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
network = AlexNet(cfg.num_classes) network = AlexNet(cfg.num_classes)
loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
repeat_size = cfg.epoch_size repeat_size = cfg.epoch_size
opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})
......
...@@ -47,7 +47,7 @@ if __name__ == "__main__": ...@@ -47,7 +47,7 @@ if __name__ == "__main__":
ds_train = create_dataset_cifar10(args.data_path, cfg.batch_size, 1) ds_train = create_dataset_cifar10(args.data_path, cfg.batch_size, 1)
network = AlexNet(cfg.num_classes) network = AlexNet(cfg.num_classes)
loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
lr = Tensor(get_lr(0, cfg.learning_rate, cfg.epoch_size, ds_train.get_dataset_size())) lr = Tensor(get_lr(0, cfg.learning_rate, cfg.epoch_size, ds_train.get_dataset_size()))
opt = nn.Momentum(network.trainable_params(), lr, cfg.momentum) opt = nn.Momentum(network.trainable_params(), lr, cfg.momentum)
model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})
......
...@@ -41,7 +41,7 @@ if __name__ == '__main__': ...@@ -41,7 +41,7 @@ if __name__ == '__main__':
net = GoogleNet(num_classes=cfg.num_classes) net = GoogleNet(num_classes=cfg.num_classes)
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, cfg.momentum, opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, cfg.momentum,
weight_decay=cfg.weight_decay) weight_decay=cfg.weight_decay)
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
if device_target == "Ascend": if device_target == "Ascend":
......
...@@ -102,7 +102,7 @@ if __name__ == '__main__': ...@@ -102,7 +102,7 @@ if __name__ == '__main__':
lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num) lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num)
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum,
weight_decay=cfg.weight_decay) weight_decay=cfg.weight_decay)
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
if device_target == "Ascend": if device_target == "Ascend":
model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}, model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'},
......
...@@ -46,7 +46,7 @@ if __name__ == "__main__": ...@@ -46,7 +46,7 @@ if __name__ == "__main__":
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
network = LeNet5(cfg.num_classes) network = LeNet5(cfg.num_classes)
net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
repeat_size = cfg.epoch_size repeat_size = cfg.epoch_size
net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
......
...@@ -50,7 +50,7 @@ if __name__ == "__main__": ...@@ -50,7 +50,7 @@ if __name__ == "__main__":
cfg.batch_size) cfg.batch_size)
network = LeNet5(cfg.num_classes) network = LeNet5(cfg.num_classes)
net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
......
...@@ -51,7 +51,7 @@ if __name__ == "__main__": ...@@ -51,7 +51,7 @@ if __name__ == "__main__":
per_channel=[True, False]) per_channel=[True, False])
# define loss # define loss
net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
# define network optimization # define network optimization
net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
......
...@@ -60,7 +60,7 @@ if __name__ == "__main__": ...@@ -60,7 +60,7 @@ if __name__ == "__main__":
symmetric=[False, False]) symmetric=[False, False])
# define network loss # define network loss
net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
# define network optimization # define network optimization
net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
......
...@@ -51,8 +51,7 @@ if __name__ == '__main__': ...@@ -51,8 +51,7 @@ if __name__ == '__main__':
else: else:
raise ValueError("Unsupported device_target.") raise ValueError("Unsupported device_target.")
loss = nn.SoftmaxCrossEntropyWithLogits( loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
is_grad=False, sparse=True, reduction='mean')
if args_opt.device_target == "Ascend": if args_opt.device_target == "Ascend":
net.to_float(mstype.float16) net.to_float(mstype.float16)
......
...@@ -173,7 +173,7 @@ if __name__ == '__main__': ...@@ -173,7 +173,7 @@ if __name__ == '__main__':
loss = CrossEntropyWithLabelSmooth(smooth_factor=config_gpu.label_smooth, loss = CrossEntropyWithLabelSmooth(smooth_factor=config_gpu.label_smooth,
num_classes=config_gpu.num_classes) num_classes=config_gpu.num_classes)
else: else:
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
# define dataset # define dataset
epoch_size = config_gpu.epoch_size epoch_size = config_gpu.epoch_size
dataset = create_dataset(dataset_path=args_opt.dataset_path, dataset = create_dataset(dataset_path=args_opt.dataset_path,
...@@ -237,8 +237,7 @@ if __name__ == '__main__': ...@@ -237,8 +237,7 @@ if __name__ == '__main__':
loss = CrossEntropyWithLabelSmooth( loss = CrossEntropyWithLabelSmooth(
smooth_factor=config_ascend.label_smooth, num_classes=config_ascend.num_classes) smooth_factor=config_ascend.label_smooth, num_classes=config_ascend.num_classes)
else: else:
loss = SoftmaxCrossEntropyWithLogits( loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
is_grad=False, sparse=True, reduction='mean')
dataset = create_dataset(dataset_path=args_opt.dataset_path, dataset = create_dataset(dataset_path=args_opt.dataset_path,
do_train=True, do_train=True,
config=config_ascend, config=config_ascend,
......
...@@ -53,7 +53,7 @@ if __name__ == '__main__': ...@@ -53,7 +53,7 @@ if __name__ == '__main__':
# convert fusion network to quantization aware network # convert fusion network to quantization aware network
network = quant.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False]) network = quant.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False])
# define network loss # define network loss
loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
# define dataset # define dataset
dataset = create_dataset(dataset_path=args_opt.dataset_path, dataset = create_dataset(dataset_path=args_opt.dataset_path,
......
...@@ -90,7 +90,7 @@ def train_on_ascend(): ...@@ -90,7 +90,7 @@ def train_on_ascend():
if config.label_smooth > 0: if config.label_smooth > 0:
loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth, num_classes=config.num_classes) loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth, num_classes=config.num_classes)
else: else:
loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
# define dataset # define dataset
dataset = create_dataset(dataset_path=args_opt.dataset_path, dataset = create_dataset(dataset_path=args_opt.dataset_path,
do_train=True, do_train=True,
...@@ -151,7 +151,7 @@ def train_on_gpu(): ...@@ -151,7 +151,7 @@ def train_on_gpu():
loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth, loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth,
num_classes=config.num_classes) num_classes=config.num_classes)
else: else:
loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
# define dataset # define dataset
epoch_size = config.epoch_size epoch_size = config.epoch_size
dataset = create_dataset(dataset_path=args_opt.dataset_path, dataset = create_dataset(dataset_path=args_opt.dataset_path,
......
...@@ -41,8 +41,7 @@ if __name__ == '__main__': ...@@ -41,8 +41,7 @@ if __name__ == '__main__':
else: else:
raise ValueError("Unsupported device_target.") raise ValueError("Unsupported device_target.")
loss = nn.SoftmaxCrossEntropyWithLogits( loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
is_grad=False, sparse=True, reduction='mean')
net = mobilenet_v3_large(num_classes=config.num_classes) net = mobilenet_v3_large(num_classes=config.num_classes)
dataset = create_dataset(dataset_path=args_opt.dataset_path, dataset = create_dataset(dataset_path=args_opt.dataset_path,
......
...@@ -163,8 +163,7 @@ if __name__ == '__main__': ...@@ -163,8 +163,7 @@ if __name__ == '__main__':
loss = CrossEntropyWithLabelSmooth( loss = CrossEntropyWithLabelSmooth(
smooth_factor=config_gpu.label_smooth, num_classes=config_gpu.num_classes) smooth_factor=config_gpu.label_smooth, num_classes=config_gpu.num_classes)
else: else:
loss = SoftmaxCrossEntropyWithLogits( loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
is_grad=False, sparse=True, reduction='mean')
# define dataset # define dataset
epoch_size = config_gpu.epoch_size epoch_size = config_gpu.epoch_size
dataset = create_dataset(dataset_path=args_opt.dataset_path, dataset = create_dataset(dataset_path=args_opt.dataset_path,
......
...@@ -22,6 +22,7 @@ from mindspore import dataset as de ...@@ -22,6 +22,7 @@ from mindspore import dataset as de
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
from mindspore.train.model import Model from mindspore.train.model import Model
from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train.serialization import load_checkpoint, load_param_into_net
from src.CrossEntropySmooth import CrossEntropySmooth
parser = argparse.ArgumentParser(description='Image classification') parser = argparse.ArgumentParser(description='Image classification')
parser.add_argument('--net', type=str, default=None, help='Resnet Model, either resnet50 or resnet101') parser.add_argument('--net', type=str, default=None, help='Resnet Model, either resnet50 or resnet101')
...@@ -79,8 +80,8 @@ if __name__ == '__main__': ...@@ -79,8 +80,8 @@ if __name__ == '__main__':
if args_opt.dataset == "imagenet2012": if args_opt.dataset == "imagenet2012":
if not config.use_label_smooth: if not config.use_label_smooth:
config.label_smooth_factor = 0.0 config.label_smooth_factor = 0.0
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", loss = CrossEntropySmooth(sparse=True, reduction='mean',
smooth_factor=config.label_smooth_factor, num_classes=config.class_num) smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
else: else:
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
......
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""define loss function for network"""
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.common import dtype as mstype
from mindspore.nn.loss.loss import _Loss
from mindspore.ops import functional as F
from mindspore.ops import operations as P
class CrossEntropySmooth(_Loss):
"""CrossEntropy"""
def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000):
super(CrossEntropySmooth, self).__init__()
self.onehot = P.OneHot()
self.sparse = sparse
self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction)
def construct(self, logit, label):
if self.sparse:
label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
loss = self.ce(logit, label)
return loss
...@@ -33,6 +33,7 @@ from mindspore.communication.management import init, get_rank, get_group_size ...@@ -33,6 +33,7 @@ from mindspore.communication.management import init, get_rank, get_group_size
import mindspore.nn as nn import mindspore.nn as nn
import mindspore.common.initializer as weight_init import mindspore.common.initializer as weight_init
from src.lr_generator import get_lr, warmup_cosine_annealing_lr from src.lr_generator import get_lr, warmup_cosine_annealing_lr
from src.CrossEntropySmooth import CrossEntropySmooth
parser = argparse.ArgumentParser(description='Image classification') parser = argparse.ArgumentParser(description='Image classification')
parser.add_argument('--net', type=str, default=None, help='Resnet Model, either resnet50 or resnet101') parser.add_argument('--net', type=str, default=None, help='Resnet Model, either resnet50 or resnet101')
...@@ -147,8 +148,8 @@ if __name__ == '__main__': ...@@ -147,8 +148,8 @@ if __name__ == '__main__':
if args_opt.dataset == "imagenet2012": if args_opt.dataset == "imagenet2012":
if not config.use_label_smooth: if not config.use_label_smooth:
config.label_smooth_factor = 0.0 config.label_smooth_factor = 0.0
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", loss = CrossEntropySmooth(sparse=True, reduction="mean",
smooth_factor=config.label_smooth_factor, num_classes=config.class_num) smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
else: else:
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
...@@ -159,11 +160,10 @@ if __name__ == '__main__': ...@@ -159,11 +160,10 @@ if __name__ == '__main__':
if args_opt.dataset == "imagenet2012": if args_opt.dataset == "imagenet2012":
if not config.use_label_smooth: if not config.use_label_smooth:
config.label_smooth_factor = 0.0 config.label_smooth_factor = 0.0
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", is_grad=False, loss = CrossEntropySmooth(sparse=True, reduction="mean",
smooth_factor=config.label_smooth_factor, num_classes=config.class_num) smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
else: else:
loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", is_grad=False, loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
num_classes=config.class_num)
if args_opt.net == "resnet101" or args_opt.net == "resnet50": if args_opt.net == "resnet101" or args_opt.net == "resnet50":
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay, opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay,
......
...@@ -134,7 +134,7 @@ def test(cloud_args=None): ...@@ -134,7 +134,7 @@ def test(cloud_args=None):
net = vgg16(num_classes=args.num_classes, args=args) net = vgg16(num_classes=args.num_classes, args=args)
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, args.momentum, opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, args.momentum,
weight_decay=args.weight_decay) weight_decay=args.weight_decay)
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
param_dict = load_checkpoint(args.pre_trained) param_dict = load_checkpoint(args.pre_trained)
......
...@@ -211,7 +211,7 @@ if __name__ == '__main__': ...@@ -211,7 +211,7 @@ if __name__ == '__main__':
loss_scale=args.loss_scale) loss_scale=args.loss_scale)
if args.dataset == "cifar10": if args.dataset == "cifar10":
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
model = Model(network, loss_fn=loss, optimizer=opt, metrics={'acc'}, model = Model(network, loss_fn=loss, optimizer=opt, metrics={'acc'},
amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None) amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None)
else: else:
......
...@@ -64,7 +64,7 @@ if __name__ == '__main__': ...@@ -64,7 +64,7 @@ if __name__ == '__main__':
weight=Tensor(embedding_table), weight=Tensor(embedding_table),
batch_size=cfg.batch_size) batch_size=cfg.batch_size)
loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
loss_cb = LossMonitor() loss_cb = LossMonitor()
......
...@@ -70,7 +70,7 @@ if __name__ == '__main__': ...@@ -70,7 +70,7 @@ if __name__ == '__main__':
if args.pre_trained: if args.pre_trained:
load_param_into_net(network, load_checkpoint(args.pre_trained)) load_param_into_net(network, load_checkpoint(args.pre_trained))
loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
loss_cb = LossMonitor() loss_cb = LossMonitor()
......
...@@ -39,7 +39,7 @@ class MsWrapper(nn.Cell): ...@@ -39,7 +39,7 @@ class MsWrapper(nn.Cell):
def me_train_tensor(net, input_np, label_np, epoch_size=2): def me_train_tensor(net, input_np, label_np, epoch_size=2):
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = SoftmaxCrossEntropyWithLogits(sparse=True)
opt = nn.Momentum(Tensor(np.array([0.1])), Tensor(np.array([0.9])), opt = nn.Momentum(Tensor(np.array([0.1])), Tensor(np.array([0.9])),
filter(lambda x: x.requires_grad, net.get_parameters())) filter(lambda x: x.requires_grad, net.get_parameters()))
context.set_context(mode=context.GRAPH_MODE) context.set_context(mode=context.GRAPH_MODE)
......
...@@ -66,7 +66,7 @@ def train(net, data, label): ...@@ -66,7 +66,7 @@ def train(net, data, label):
momentum = 0.9 momentum = 0.9
optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer
train_network.set_train() train_network.set_train()
......
...@@ -85,7 +85,7 @@ def test_lenet_nccl(): ...@@ -85,7 +85,7 @@ def test_lenet_nccl():
learning_rate = multisteplr(epoch, 2) learning_rate = multisteplr(epoch, 2)
momentum = 0.9 momentum = 0.9
mom_optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) mom_optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(net_with_criterion, mom_optimizer) train_network = TrainOneStepCell(net_with_criterion, mom_optimizer)
train_network.set_train() train_network.set_train()
......
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""define loss function for network"""
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.common import dtype as mstype
from mindspore.nn.loss.loss import _Loss
from mindspore.ops import functional as F
from mindspore.ops import operations as P
class CrossEntropySmooth(_Loss):
"""CrossEntropy"""
def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000):
super(CrossEntropySmooth, self).__init__()
self.onehot = P.OneHot()
self.sparse = sparse
self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction)
def construct(self, logit, label):
if self.sparse:
label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
loss = self.ce(logit, label)
return loss
...@@ -36,12 +36,12 @@ from tests.st.networks.models.resnet50.src.dataset import create_dataset ...@@ -36,12 +36,12 @@ from tests.st.networks.models.resnet50.src.dataset import create_dataset
from tests.st.networks.models.resnet50.src.lr_generator import get_learning_rate from tests.st.networks.models.resnet50.src.lr_generator import get_learning_rate
from tests.st.networks.models.resnet50.src.config import config from tests.st.networks.models.resnet50.src.config import config
from tests.st.networks.models.resnet50.src.metric import DistAccuracy, ClassifyCorrectCell from tests.st.networks.models.resnet50.src.metric import DistAccuracy, ClassifyCorrectCell
from tests.st.networks.models.resnet50.src.CrossEntropySmooth import CrossEntropySmooth
from tests.st.networks.models.resnet50.src_thor.config import config as thor_config from tests.st.networks.models.resnet50.src_thor.config import config as thor_config
from tests.st.networks.models.resnet50.src_thor.model_thor import Model as THOR_Model from tests.st.networks.models.resnet50.src_thor.model_thor import Model as THOR_Model
from tests.st.networks.models.resnet50.src_thor.resnet import resnet50 as resnet50_thor from tests.st.networks.models.resnet50.src_thor.resnet import resnet50 as resnet50_thor
from tests.st.networks.models.resnet50.src_thor.thor import THOR from tests.st.networks.models.resnet50.src_thor.thor import THOR
MINDSPORE_HCCL_CONFIG_PATH = "/home/workspace/mindspore_config/hccl/rank_tabel_4p/rank_table_4p_1.json" MINDSPORE_HCCL_CONFIG_PATH = "/home/workspace/mindspore_config/hccl/rank_tabel_4p/rank_table_4p_1.json"
MINDSPORE_HCCL_CONFIG_PATH_2 = "/home/workspace/mindspore_config/hccl/rank_tabel_4p/rank_table_4p_2.json" MINDSPORE_HCCL_CONFIG_PATH_2 = "/home/workspace/mindspore_config/hccl/rank_tabel_4p/rank_table_4p_2.json"
dataset_path = "/home/workspace/mindspore_dataset/imagenet/imagenet_original/train" dataset_path = "/home/workspace/mindspore_dataset/imagenet/imagenet_original/train"
...@@ -151,8 +151,8 @@ def train_process(q, device_id, epoch_size, device_num, enable_hccl): ...@@ -151,8 +151,8 @@ def train_process(q, device_id, epoch_size, device_num, enable_hccl):
config.label_smooth_factor = 0.0 config.label_smooth_factor = 0.0
# loss # loss
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", smooth_factor=config.label_smooth_factor, loss = CrossEntropySmooth(sparse=True, reduction="mean", smooth_factor=config.label_smooth_factor,
num_classes=config.class_num) num_classes=config.class_num)
# train dataset # train dataset
dataset = create_dataset(dataset_path=dataset_path, do_train=True, dataset = create_dataset(dataset_path=dataset_path, do_train=True,
...@@ -260,9 +260,8 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl): ...@@ -260,9 +260,8 @@ def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
thor_config.label_smooth_factor = 0.0 thor_config.label_smooth_factor = 0.0
# loss # loss
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", loss = CrossEntropySmooth(sparse=True, reduction="mean", smooth_factor=thor_config.label_smooth_factor,
smooth_factor=thor_config.label_smooth_factor, num_classes=thor_config.class_num)
num_classes=thor_config.class_num)
# train dataset # train dataset
dataset = create_dataset(dataset_path=dataset_path, do_train=True, dataset = create_dataset(dataset_path=dataset_path, do_train=True,
......
...@@ -60,7 +60,7 @@ def train(net, data, label): ...@@ -60,7 +60,7 @@ def train(net, data, label):
momentum = 0.9 momentum = 0.9
optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer
train_network.set_train() train_network.set_train()
......
...@@ -76,7 +76,7 @@ def test_trainTensor(num_classes=10, epoch=15, batch_size=32): ...@@ -76,7 +76,7 @@ def test_trainTensor(num_classes=10, epoch=15, batch_size=32):
lr = 0.1 lr = 0.1
momentum = 0.9 momentum = 0.9
optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum, weight_decay=0.0001) optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum, weight_decay=0.0001)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(net_with_criterion, optimizer) train_network = TrainOneStepCell(net_with_criterion, optimizer)
train_network.set_train() train_network.set_train()
......
...@@ -136,7 +136,7 @@ def test_train_lenet(): ...@@ -136,7 +136,7 @@ def test_train_lenet():
learning_rate = multisteplr(epoch, 30) learning_rate = multisteplr(epoch, 30)
optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer
train_network.set_train() train_network.set_train()
...@@ -192,7 +192,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, ...@@ -192,7 +192,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
def test_train_and_eval_lenet(): def test_train_and_eval_lenet():
context.set_context(mode=context.GRAPH_MODE, device_target="GPU") context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
network = LeNet5(10) network = LeNet5(10)
net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9) net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)
model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
......
...@@ -129,7 +129,7 @@ def test_LSTM(): ...@@ -129,7 +129,7 @@ def test_LSTM():
momentum = 0.9 momentum = 0.9
optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer
train_network.set_train() train_network.set_train()
......
...@@ -337,7 +337,7 @@ def test_trainTensor(num_classes=10, epoch=8, batch_size=1): ...@@ -337,7 +337,7 @@ def test_trainTensor(num_classes=10, epoch=8, batch_size=1):
momentum = 0.9 momentum = 0.9
optimizer = Momentum(filter(lambda x: x.requires_grad, optimizer = Momentum(filter(lambda x: x.requires_grad,
net.get_parameters()), lr, momentum) net.get_parameters()), lr, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell( train_network = TrainOneStepCell(
net_with_criterion, optimizer) # optimizer net_with_criterion, optimizer) # optimizer
...@@ -361,7 +361,7 @@ def test_trainTensor_big_batchSize(num_classes=10, epoch=8, batch_size=338): ...@@ -361,7 +361,7 @@ def test_trainTensor_big_batchSize(num_classes=10, epoch=8, batch_size=338):
momentum = 0.9 momentum = 0.9
optimizer = Momentum(filter(lambda x: x.requires_grad, optimizer = Momentum(filter(lambda x: x.requires_grad,
net.get_parameters()), lr, momentum) net.get_parameters()), lr, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell( train_network = TrainOneStepCell(
net_with_criterion, optimizer) # optimizer net_with_criterion, optimizer) # optimizer
...@@ -385,7 +385,7 @@ def test_trainTensor_amp(num_classes=10, epoch=18, batch_size=16): ...@@ -385,7 +385,7 @@ def test_trainTensor_amp(num_classes=10, epoch=18, batch_size=16):
momentum = 0.9 momentum = 0.9
optimizer = Momentum(filter(lambda x: x.requires_grad, optimizer = Momentum(filter(lambda x: x.requires_grad,
net.get_parameters()), lr, momentum) net.get_parameters()), lr, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
train_network = amp.build_train_network( train_network = amp.build_train_network(
net, optimizer, criterion, level="O2") net, optimizer, criterion, level="O2")
train_network.set_train() train_network.set_train()
......
...@@ -39,7 +39,7 @@ def train(net, data, label): ...@@ -39,7 +39,7 @@ def train(net, data, label):
momentum = 0.9 momentum = 0.9
optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer
train_network.set_train() train_network.set_train()
......
...@@ -52,7 +52,7 @@ def test_momentum(): ...@@ -52,7 +52,7 @@ def test_momentum():
momentum = 0.9 momentum = 0.9
optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer
train_network.set_train() train_network.set_train()
......
...@@ -49,7 +49,7 @@ def test_adam(): ...@@ -49,7 +49,7 @@ def test_adam():
net = NetAdam() net = NetAdam()
optimizer = Adam(filter(lambda x: x.requires_grad, optimizer = Adam(filter(lambda x: x.requires_grad,
net.get_parameters()), learning_rate=0.01) net.get_parameters()), learning_rate=0.01)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell( train_network = TrainOneStepCell(
net_with_criterion, optimizer) net_with_criterion, optimizer)
......
...@@ -49,7 +49,7 @@ def test_ftrl(): ...@@ -49,7 +49,7 @@ def test_ftrl():
net = NetFtrl() net = NetFtrl()
optimizer = FTRL(filter(lambda x: x.requires_grad, optimizer = FTRL(filter(lambda x: x.requires_grad,
net.get_parameters()), learning_rate=0.01) net.get_parameters()), learning_rate=0.01)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell( train_network = TrainOneStepCell(
net_with_criterion, optimizer) net_with_criterion, optimizer)
......
...@@ -52,7 +52,7 @@ def test_momentum(): ...@@ -52,7 +52,7 @@ def test_momentum():
momentum = 0.9 momentum = 0.9
optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer
train_network.set_train() train_network.set_train()
......
...@@ -55,7 +55,7 @@ def test_SGD(): ...@@ -55,7 +55,7 @@ def test_SGD():
optimizer = SGD(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum, dampening, optimizer = SGD(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum, dampening,
weight_decay, nesterov, loss_scale) weight_decay, nesterov, loss_scale)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer
train_network.set_train() train_network.set_train()
......
...@@ -20,15 +20,13 @@ import mindspore.context as context ...@@ -20,15 +20,13 @@ import mindspore.context as context
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import Tensor from mindspore import Tensor
class NetSparseSoftmaxCrossEntropyWithLogits(nn.Cell): class NetSparseSoftmaxCrossEntropyWithLogits(nn.Cell):
def __init__(self): def __init__(self):
super(NetSparseSoftmaxCrossEntropyWithLogits, self).__init__() super(NetSparseSoftmaxCrossEntropyWithLogits, self).__init__()
self.loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) self.loss = self.loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
self.dlogits = nn.SoftmaxCrossEntropyWithLogits(is_grad=True, sparse=True)
def construct(self, logits, labels): def construct(self, logits, labels):
return (self.loss(logits, labels), self.dlogits(logits, labels)) return self.loss(logits, labels)
@pytest.mark.level0 @pytest.mark.level0
...@@ -39,29 +37,18 @@ def test_sparse_softmax_cross_entropy_with_logits(): ...@@ -39,29 +37,18 @@ def test_sparse_softmax_cross_entropy_with_logits():
[1, 10, 1], [1, 10, 1],
[10, 1, 1]]).astype(np.float32)) [10, 1, 1]]).astype(np.float32))
labels = Tensor(np.array([2, 1, 0]).astype(np.int32)) labels = Tensor(np.array([2, 1, 0]).astype(np.int32))
expect_loss = 0.0002467 expect_loss = [0.00024673, 0.00024673, 0.00024673]
expect_dlogits = np.array([[4.1126452e-05, 4.1126452e-05, -8.2234539e-05],
[4.1126452e-05, -8.2234539e-05, 4.1126452e-05],
[-8.2234539e-05, 4.1126452e-05, 4.1126452e-05]]).astype(np.float32)
context.set_context(mode=context.GRAPH_MODE, device_target='GPU') context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
sparse_softmax_cross_entropy_with_logits = NetSparseSoftmaxCrossEntropyWithLogits() sparse_softmax_cross_entropy_with_logits = NetSparseSoftmaxCrossEntropyWithLogits()
output = sparse_softmax_cross_entropy_with_logits(logits, labels) output = sparse_softmax_cross_entropy_with_logits(logits, labels)
error0 = 1.0e-6 error0 = 1.0e-6
diff0 = output[0].asnumpy() - expect_loss diff0 = output.asnumpy() - expect_loss
assert np.all(abs(diff0) < error0) assert np.all(abs(diff0) < error0)
error1 = np.ones(shape=[3, 3]) * 1.0e-6
diff1 = output[1].asnumpy() - expect_dlogits
assert np.all(abs(diff1) < error1)
context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU') context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')
sparse_softmax_cross_entropy_with_logits = NetSparseSoftmaxCrossEntropyWithLogits() sparse_softmax_cross_entropy_with_logits = NetSparseSoftmaxCrossEntropyWithLogits()
output = sparse_softmax_cross_entropy_with_logits(logits, labels) output = sparse_softmax_cross_entropy_with_logits(logits, labels)
error0 = 1.0e-6 error0 = 1.0e-6
diff0 = output[0].asnumpy() - expect_loss diff0 = output.asnumpy() - expect_loss
assert np.all(abs(diff0) < error0) assert np.all(abs(diff0) < error0)
error1 = np.ones(shape=[3, 3]) * 1.0e-6
diff1 = output[1].asnumpy() - expect_dlogits
assert np.all(abs(diff1) < error1)
...@@ -124,7 +124,7 @@ def validate_model(net, dataset): ...@@ -124,7 +124,7 @@ def validate_model(net, dataset):
if __name__ == "__main__": if __name__ == "__main__":
network = BNNLeNet5() network = BNNLeNet5()
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
optimizer = nn.AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0001) optimizer = nn.AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0001)
net_with_loss = bnn_layers.WithBNNLossCell(network, criterion, 60000, 0.000001) net_with_loss = bnn_layers.WithBNNLossCell(network, criterion, 60000, 0.000001)
......
...@@ -125,7 +125,7 @@ def validate_model(net, dataset): ...@@ -125,7 +125,7 @@ def validate_model(net, dataset):
if __name__ == "__main__": if __name__ == "__main__":
network = LeNet5() network = LeNet5()
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
optimizer = nn.AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0001) optimizer = nn.AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0001)
net_with_loss = WithLossCell(network, criterion) net_with_loss = WithLossCell(network, criterion)
......
...@@ -124,7 +124,7 @@ def validate_model(net, dataset): ...@@ -124,7 +124,7 @@ def validate_model(net, dataset):
if __name__ == "__main__": if __name__ == "__main__":
network = LeNet5() network = LeNet5()
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
optimizer = nn.AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0001) optimizer = nn.AdamWeightDecay(params=network.trainable_params(), learning_rate=0.0001)
net_with_loss = WithLossCell(network, criterion) net_with_loss = WithLossCell(network, criterion)
......
...@@ -73,9 +73,7 @@ def do_sparse_embedding(ps=False): ...@@ -73,9 +73,7 @@ def do_sparse_embedding(ps=False):
optimizer = Adam(filter(lambda x: x.requires_grad, net.get_parameters())) optimizer = Adam(filter(lambda x: x.requires_grad, net.get_parameters()))
optimizer.sparse_opt.add_prim_attr("primitive_target", "CPU") optimizer.sparse_opt.add_prim_attr("primitive_target", "CPU")
criterion = nn.SoftmaxCrossEntropyWithLogits( criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
is_grad=False, sparse=True, reduction="mean"
)
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(net_with_criterion, optimizer) train_network = TrainOneStepCell(net_with_criterion, optimizer)
train_network.set_train() train_network.set_train()
......
...@@ -123,7 +123,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1, ...@@ -123,7 +123,7 @@ def create_dataset(data_path, batch_size=32, repeat_size=1,
if __name__ == "__main__": if __name__ == "__main__":
network = LeNet5(10) network = LeNet5(10)
network.set_param_ps() network.set_param_ps()
net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9) net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)
model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
......
...@@ -94,9 +94,7 @@ if __name__ == "__main__": ...@@ -94,9 +94,7 @@ if __name__ == "__main__":
np.random.seed(0) np.random.seed(0)
network = LeNet5(10) network = LeNet5(10)
network.set_param_ps() network.set_param_ps()
criterion = nn.SoftmaxCrossEntropyWithLogits( criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
is_grad=False, sparse=True, reduction="mean"
)
net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9) net_opt = nn.Momentum(network.trainable_params(), 0.01, 0.9)
if device_target == "GPU": if device_target == "GPU":
context.set_auto_parallel_context(parallel_mode="data_parallel", mirror_mean=True, device_num=get_group_size()) context.set_auto_parallel_context(parallel_mode="data_parallel", mirror_mean=True, device_num=get_group_size())
......
...@@ -159,7 +159,7 @@ def test_pynative_lenet_train_hook_function_print_and_save_grad(): ...@@ -159,7 +159,7 @@ def test_pynative_lenet_train_hook_function_print_and_save_grad():
cell_hook_function_print_grad) cell_hook_function_print_grad)
net = LeNet5(hook_function=function[0], cell_hook_function=function[1]) net = LeNet5(hook_function=function[0], cell_hook_function=function[1])
optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9) optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=False)
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = GradWrap(net_with_criterion) train_network = GradWrap(net_with_criterion)
train_network.set_train() train_network.set_train()
......
...@@ -145,14 +145,14 @@ def test_multi_grads(): ...@@ -145,14 +145,14 @@ def test_multi_grads():
net = LeNet() net = LeNet()
# grad operation # grad operation
loss_fn = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=sparse) loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=sparse)
with_loss_cell = WithLossCell(net, loss_fn) with_loss_cell = WithLossCell(net, loss_fn)
grad_all = GradWrapWithLoss(with_loss_cell) grad_all = GradWrapWithLoss(with_loss_cell)
grad_out = grad_all(Tensor(inputs_np), Tensor(labels_np)).asnumpy() grad_out = grad_all(Tensor(inputs_np), Tensor(labels_np)).asnumpy()
assert np.any(grad_out != 0), 'grad result can not be all zeros' assert np.any(grad_out != 0), 'grad result can not be all zeros'
# train-one-step operation # train-one-step operation
loss_fn = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=sparse) loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=sparse)
optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()),
0.01, 0.9) 0.01, 0.9)
loss_net = WithLossCell(net, loss_fn) loss_net = WithLossCell(net, loss_fn)
......
...@@ -42,7 +42,7 @@ def train_lenet(): ...@@ -42,7 +42,7 @@ def train_lenet():
cfg.batch_size) cfg.batch_size)
network = LeNet5(cfg.num_classes) network = LeNet5(cfg.num_classes)
net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
...@@ -74,7 +74,7 @@ def train_lenet_quant(): ...@@ -74,7 +74,7 @@ def train_lenet_quant():
symmetric=[False, False]) symmetric=[False, False])
# define network loss # define network loss
net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
# define network optimization # define network optimization
net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
...@@ -104,7 +104,7 @@ def eval_quant(): ...@@ -104,7 +104,7 @@ def eval_quant():
per_channel=[True, False]) per_channel=[True, False])
# define loss # define loss
net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
# define network optimization # define network optimization
net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
......
...@@ -154,7 +154,7 @@ class TestSummary: ...@@ -154,7 +154,7 @@ class TestSummary:
def _run_network(self, dataset_sink_mode=True): def _run_network(self, dataset_sink_mode=True):
lenet = LeNet5() lenet = LeNet5()
loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
optim = Momentum(lenet.trainable_params(), learning_rate=0.1, momentum=0.9) optim = Momentum(lenet.trainable_params(), learning_rate=0.1, momentum=0.9)
model = Model(lenet, loss_fn=loss, optimizer=optim, metrics={'acc': Accuracy()}) model = Model(lenet, loss_fn=loss, optimizer=optim, metrics={'acc': Accuracy()})
summary_dir = tempfile.mkdtemp(dir=self.base_summary_dir) summary_dir = tempfile.mkdtemp(dir=self.base_summary_dir)
......
...@@ -31,7 +31,7 @@ def lr_gen(fn, epoch_size): ...@@ -31,7 +31,7 @@ def lr_gen(fn, epoch_size):
def me_train_tensor(net, input_np, label_np, epoch_size=2): def me_train_tensor(net, input_np, label_np, epoch_size=2):
"""me_train_tensor""" """me_train_tensor"""
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr_gen(lambda i: 0.1, epoch_size), 0.9, opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr_gen(lambda i: 0.1, epoch_size), 0.9,
0.01, 1024) 0.01, 1024)
Model(net, loss, opt) Model(net, loss, opt)
......
...@@ -78,7 +78,7 @@ def lr_gen(fn, epoch_size): ...@@ -78,7 +78,7 @@ def lr_gen(fn, epoch_size):
def me_train_tensor(net, input_np, label_np, epoch_size=2): def me_train_tensor(net, input_np, label_np, epoch_size=2):
"""me_train_tensor""" """me_train_tensor"""
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
# reorder the net parameters , leave the parameters that need to be passed into lars to the end part # reorder the net parameters , leave the parameters that need to be passed into lars to the end part
opt = Momentum(get_net_trainable_reordered_params(net)[2], lr_gen(lambda i: 0.1, epoch_size), 0.9, 0.01, 1024) opt = Momentum(get_net_trainable_reordered_params(net)[2], lr_gen(lambda i: 0.1, epoch_size), 0.9, 0.01, 1024)
......
...@@ -114,7 +114,7 @@ def train_common(net): ...@@ -114,7 +114,7 @@ def train_common(net):
label = Tensor(np.ones([batch_size]), dtype=ms.int32) label = Tensor(np.ones([batch_size]), dtype=ms.int32)
dataset = Dataset(predict, label, 2) dataset = Dataset(predict, label, 2)
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
opt = Momentum(net.trainable_params(), learning_rate, momentum) opt = Momentum(net.trainable_params(), learning_rate, momentum)
model = Model(net, loss, opt) model = Model(net, loss, opt)
......
...@@ -79,7 +79,7 @@ def all_to_all_common(strategy1): ...@@ -79,7 +79,7 @@ def all_to_all_common(strategy1):
dataset = Dataset(predict, label, 2) dataset = Dataset(predict, label, 2)
net = all_to_all_net(strategy1) net = all_to_all_net(strategy1)
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = SoftmaxCrossEntropyWithLogits(sparse=True)
loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1))) loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
loss.one_hot.set_strategy(((8, 1), (), ())) loss.one_hot.set_strategy(((8, 1), (), ()))
opt = Momentum(net.trainable_params(), learning_rate, momentum) opt = Momentum(net.trainable_params(), learning_rate, momentum)
......
...@@ -134,7 +134,7 @@ def test_batchnorm_batch_parallel(): ...@@ -134,7 +134,7 @@ def test_batchnorm_batch_parallel():
dataset = DatasetLenet(predict, label, 2) dataset = DatasetLenet(predict, label, 2)
net = batchnorm_net(num_classes) net = batchnorm_net(num_classes)
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1))) loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
......
...@@ -209,7 +209,7 @@ def bn_common(parallel_mode, train_flag, strategy_loss=None): ...@@ -209,7 +209,7 @@ def bn_common(parallel_mode, train_flag, strategy_loss=None):
dataset = Dataset(predict, label, 2) dataset = Dataset(predict, label, 2)
net = bn_net() net = bn_net()
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
loss.softmax_cross_entropy.set_strategy(strategy_loss) loss.softmax_cross_entropy.set_strategy(strategy_loss)
opt = Momentum(net.trainable_params(), learning_rate, momentum, 0.0001, 1024 * rank_size) opt = Momentum(net.trainable_params(), learning_rate, momentum, 0.0001, 1024 * rank_size)
......
...@@ -80,7 +80,7 @@ def loss_scale_manager_common(strategy1): ...@@ -80,7 +80,7 @@ def loss_scale_manager_common(strategy1):
dataset = Dataset(predict, label, 2) dataset = Dataset(predict, label, 2)
net = all_to_all_net(strategy1) net = all_to_all_net(strategy1)
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1))) loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
opt = Momentum(net.trainable_params(), learning_rate, momentum) opt = Momentum(net.trainable_params(), learning_rate, momentum)
scale_manager = DynamicLossScaleManager(32, 2, 2000) scale_manager = DynamicLossScaleManager(32, 2, 2000)
......
...@@ -76,7 +76,7 @@ def all_to_all_common(strategy1): ...@@ -76,7 +76,7 @@ def all_to_all_common(strategy1):
dataset = Dataset(predict, label, 2) dataset = Dataset(predict, label, 2)
net = all_to_all_net(strategy1) net = all_to_all_net(strategy1)
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1))) loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
loss.one_hot.set_strategy(((8, 1), (), ())) loss.one_hot.set_strategy(((8, 1), (), ()))
opt = Momentum(net.trainable_params(), learning_rate, momentum) opt = Momentum(net.trainable_params(), learning_rate, momentum)
......
...@@ -82,7 +82,7 @@ def all_to_all_common(): ...@@ -82,7 +82,7 @@ def all_to_all_common():
dataset = Dataset(predict, label, 2) dataset = Dataset(predict, label, 2)
net = all_to_all_net() net = all_to_all_net()
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
opt = Momentum(net.trainable_params(), learning_rate, momentum) opt = Momentum(net.trainable_params(), learning_rate, momentum)
model = Model(net, loss, opt) model = Model(net, loss, opt)
......
...@@ -362,7 +362,7 @@ def test_resnet_operator_batch_parallel(): ...@@ -362,7 +362,7 @@ def test_resnet_operator_batch_parallel():
dataset = DatasetLenet(predict, label, 2) dataset = DatasetLenet(predict, label, 2)
net = resnet_operator_net(num_classes) net = resnet_operator_net(num_classes)
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1))) loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
...@@ -387,7 +387,7 @@ def test_resnet_model_parallel(): ...@@ -387,7 +387,7 @@ def test_resnet_model_parallel():
dataset = DatasetLenet(predict, label, 2) dataset = DatasetLenet(predict, label, 2)
net = resnet_model_parallel_net(num_classes) net = resnet_model_parallel_net(num_classes)
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1))) loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
......
...@@ -108,7 +108,7 @@ def reshape_common(parallel_mode): ...@@ -108,7 +108,7 @@ def reshape_common(parallel_mode):
dataset = Dataset(predict, label, 2) dataset = Dataset(predict, label, 2)
net = prelu_net() net = prelu_net()
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
opt = Momentum(net.trainable_params(), learning_rate, momentum) opt = Momentum(net.trainable_params(), learning_rate, momentum)
model = Model(net, loss, opt) model = Model(net, loss, opt)
model.train(epoch_size, dataset, dataset_sink_mode=False) model.train(epoch_size, dataset, dataset_sink_mode=False)
......
...@@ -95,7 +95,7 @@ def reshape_common(parallel_mode, strategy0, strategy1, strategy2, strategy_loss ...@@ -95,7 +95,7 @@ def reshape_common(parallel_mode, strategy0, strategy1, strategy2, strategy_loss
dataset = Dataset(predict, label, 2) dataset = Dataset(predict, label, 2)
net = reshape_net(strategy0, strategy1, strategy2) net = reshape_net(strategy0, strategy1, strategy2)
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
loss.softmax_cross_entropy.set_strategy(strategy_loss) loss.softmax_cross_entropy.set_strategy(strategy_loss)
loss.one_hot.set_strategy(((8, 1), (), ())) loss.one_hot.set_strategy(((8, 1), (), ()))
opt = Momentum(net.trainable_params(), learning_rate, momentum) opt = Momentum(net.trainable_params(), learning_rate, momentum)
......
...@@ -80,7 +80,7 @@ def transpose_common(strategy1, strategy2): ...@@ -80,7 +80,7 @@ def transpose_common(strategy1, strategy2):
dataset = Dataset(predict, label, 2) dataset = Dataset(predict, label, 2)
net = transpose_net(strategy1, strategy2) net = transpose_net(strategy1, strategy2)
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1))) loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
opt = Momentum(net.trainable_params(), learning_rate, momentum) opt = Momentum(net.trainable_params(), learning_rate, momentum)
context.set_context(mode=context.GRAPH_MODE) context.set_context(mode=context.GRAPH_MODE)
......
...@@ -141,7 +141,7 @@ class GradWrap(nn.Cell): ...@@ -141,7 +141,7 @@ class GradWrap(nn.Cell):
def test_hook(): def test_hook():
net = LeNet5() net = LeNet5()
optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9) optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.1, 0.9)
criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=False)
net_with_criterion = WithLossCell(net, criterion) net_with_criterion = WithLossCell(net, criterion)
train_network = GradWrap(net_with_criterion) train_network = GradWrap(net_with_criterion)
train_network.set_train() train_network.set_train()
......
...@@ -129,7 +129,7 @@ def test_lenet_grad(): ...@@ -129,7 +129,7 @@ def test_lenet_grad():
verification_step = 0 verification_step = 0
net = LeNet5() net = LeNet5()
loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False) loss = nn.SoftmaxCrossEntropyWithLogits()
momen_opti = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) momen_opti = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
train_net = GradWrap(NetWithLossClass(net)) train_net = GradWrap(NetWithLossClass(net))
train_net.set_train() train_net.set_train()
......
...@@ -283,7 +283,7 @@ def test_load_param_into_net(): ...@@ -283,7 +283,7 @@ def test_load_param_into_net():
def test_save_checkpoint_for_network(): def test_save_checkpoint_for_network():
""" test save_checkpoint for network""" """ test save_checkpoint for network"""
net = Net() net = Net()
loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss = SoftmaxCrossEntropyWithLogits(sparse=True)
opt = Momentum(net.trainable_params(), 0.0, 0.9, 0.0001, 1024) opt = Momentum(net.trainable_params(), 0.0, 0.9, 0.0001, 1024)
loss_net = WithLossCell(net, loss) loss_net = WithLossCell(net, loss)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册