diff --git a/mindspore/nn/optim/lamb.py b/mindspore/nn/optim/lamb.py index b4d478f52ab38be605719c9f9dfa124dcd7b3240..a6a38f164a72216362ebb34268fd30264562c71b 100755 --- a/mindspore/nn/optim/lamb.py +++ b/mindspore/nn/optim/lamb.py @@ -180,7 +180,7 @@ class Lamb(Optimizer): beta2=0.999, eps=1e-6, weight_decay=0.0, - decay_filter=lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name): + decay_filter=lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower()): super(Lamb, self).__init__(start_learning_rate, params) if self.is_group: diff --git a/mindspore/ops/_grad/grad_math_ops.py b/mindspore/ops/_grad/grad_math_ops.py index c83d13a56d07bd97429fc9bb332eda0762475695..ee71979f28d5fdfb3010b6512598763a18d703b4 100755 --- a/mindspore/ops/_grad/grad_math_ops.py +++ b/mindspore/ops/_grad/grad_math_ops.py @@ -194,8 +194,8 @@ def get_bprop_mul(self): mul_func = P.Mul() def bprop(x, y, out, dout): - bc_dx = mul_func(dout, y) - bc_dy = mul_func(dout, x) + bc_dx = mul_func(y, dout) + bc_dy = mul_func(x, dout) return binop_grad_common(x, y, bc_dx, bc_dy) return bprop