fix performance of bert

f65913d6 · chenhaozhe · 25b0037b · f65913d6 · f65913d6 · f65913d6
3 changed file
--- a/mindspore/ccsrc/pre_activate/common/helper.cc
+++ b/mindspore/ccsrc/pre_activate/common/helper.cc
@@ -687,7 +687,7 @@ bool IsSameNode(const EquivPtr &equiv1, const EquivPtr &equiv2, const VarPtr &va
  MS_EXCEPTION_IF_NULL(equiv1_node);
  auto equiv2_node = GetAnfNodeByVar(equiv2, var_node);
  MS_EXCEPTION_IF_NULL(equiv2_node);
-  return equiv1_node == equiv2_node;
+  return *equiv1_node == *equiv2_node;
 }
 AnfNodePtr GetAnfNodeByVar(const EquivPtr &equiv, const VarPtr &var_node) {

--- a/mindspore/nn/optim/lamb.py
+++ b/mindspore/nn/optim/lamb.py
@@ -180,7 +180,7 @@ class Lamb(Optimizer):
                 beta2=0.999,
                 eps=1e-6,
                 weight_decay=0.0,
-                 decay_filter=lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name):
+                 decay_filter=lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower()):
        super(Lamb, self).__init__(start_learning_rate, params)
        if self.is_group:

--- a/mindspore/ops/_grad/grad_math_ops.py
+++ b/mindspore/ops/_grad/grad_math_ops.py
@@ -191,8 +191,8 @@ def get_bprop_mul(self):
    mul_func = P.Mul()
    def bprop(x, y, out, dout):
-        bc_dx = mul_func(dout, y)
+        bc_dx = mul_func(y, dout)
-        bc_dy = mul_func(dout, x)
+        bc_dy = mul_func(x, dout)
        return binop_grad_common(x, y, bc_dx, bc_dy)
    return bprop