diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
index 35961ae678f71e763e9b358bae0f47a26790967d..fb69c2cc363bf26cc0595ebbdc4188c3346c0b5c 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
@@ -52,6 +52,7 @@
 #include "pre_activate/ascend/ir_fusion/batchnorm_to_bninfer.h"
 #include "pre_activate/ascend/ir_fusion/batchnormgrad_to_bninfergrad.h"
 #include "pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h"
+#include "pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h"
 #include "pre_activate/ascend/format_type/insert_trans_op.h"
 #include "pre_activate/pass/getitem_tuple.h"
 #include "pre_activate/pass/optimize_dependence.h"
@@ -114,7 +115,10 @@ void AddAscendBackendOptionalIRFusion(PassManager *ir_fusion_pm) {
   ir_fusion_pm->AddPass(std::make_shared<ClipByValueFusion>());
   ir_fusion_pm->AddPass(std::make_shared<TopKSplit>());
   ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneWithDecayRule>());
-  ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneCond1Fusion>());
+  ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneCond2Fusion>());
+  ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneCond3Fusion>());
+  ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneCond4Fusion>());
   ir_fusion_pm->AddPass(std::make_shared<MomentumLossscaleFusion>());
   ir_fusion_pm->AddPass(std::make_shared<MulAddFusion>());
   ir_fusion_pm->AddPass(std::make_shared<MulAddNFusion>());
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ccb0cbfcb8788afb90291f5691faf4bb68600bb5
--- /dev/null
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.cc
@@ -0,0 +1,54 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h"
+#include <memory>
+#include "session/anf_runtime_algorithm.h"
+#include "ir/primitive.h"
+#include "utils/utils.h"
+#include "pre_activate/common/helper.h"
+
+namespace mindspore {
+namespace opt {
+const BaseRef SoftmaxGradExtFusion::DefinePattern() const {
+  VectorRef mul({prim::kPrimMul, input1_, input0_});
+  VectorRef sum({sum_var_, mul});
+  VectorRef sub({prim::kPrimSub, input0_, sum});
+  VectorRef mul1({prim::kPrimMul, input2_, input1_});
+  VectorRef mul_grad({prim::kPrimMul, mul1, sub});
+  return mul_grad;
+}
+
+const AnfNodePtr SoftmaxGradExtFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node,
+                                               const EquivPtr &equiv) const {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(equiv);
+  MS_EXCEPTION_IF_NULL(node);
+  auto input0 = GetAnfNodeByVar(equiv, input0_);
+  auto input1 = GetAnfNodeByVar(equiv, input1_);
+  auto input2 = GetAnfNodeByVar(equiv, input2_);
+  auto sum = GetAnfNodeByVar(equiv, sum_var_);
+
+  auto prim = std::make_shared<Primitive>(kSoftmaxGradExtOpName);
+  auto fusion_node = graph->NewCNode({NewValueNode(prim), input0, input1, input2});
+  MS_EXCEPTION_IF_NULL(fusion_node);
+  fusion_node->set_scope(node->scope());
+  fusion_node->set_abstract(node->abstract());
+  AnfAlgo::CopyNodeAttr(kAttrKeepDims, sum, fusion_node);
+  AnfAlgo::CopyNodeAttr(kAttrAxis, sum, fusion_node);
+  return fusion_node;
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h
new file mode 100644
index 0000000000000000000000000000000000000000..70c5658e6086f42413ece583261f700ed7818aa7
--- /dev/null
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h
@@ -0,0 +1,44 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_SOFTMAX_GRAD_EXT_FUSION_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_SOFTMAX_GRAD_EXT_FUSION_H_
+
+#include <memory>
+#include "pre_activate/common/optimizer.h"
+
+namespace mindspore {
+namespace opt {
+class SoftmaxGradExtFusion : public PatternProcessPass {
+ public:
+  explicit SoftmaxGradExtFusion(bool multigraph = true) : PatternProcessPass("softmax_grad_ext_fusion", multigraph) {
+    input0_ = std::make_shared<Var>();
+    input1_ = std::make_shared<Var>();
+    input2_ = std::make_shared<Var>();
+    sum_var_ = std::make_shared<Var>(std::make_shared<Primitive>(prim::kPrimReduceSum->name()));
+  }
+  ~SoftmaxGradExtFusion() override = default;
+  const BaseRef DefinePattern() const override;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+
+ private:
+  VarPtr input0_;
+  VarPtr input1_;
+  VarPtr input2_;
+  VarPtr sum_var_;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_SOFTMAX_GRAD_EXT_FUSION_H_
diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h
index 40930e50267f1a3f03007f2d6c2867ecdb390664..af6178b947b0ebbca50bebc6fbd36cdc52cf3417 100644
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@@ -151,6 +151,7 @@ constexpr auto kLarsV2OpName = "LarsV2";
 constexpr auto kLarsV2UpdateOpName = "LarsV2Update";
 constexpr auto kSquareSumAllOpName = "SquareSumAll";
 constexpr auto kNMSWithMaskOpName = "NMSWithMask";
+constexpr auto kSoftmaxGradExtOpName = "SoftmaxGradExt";
 
 // attr key name
 constexpr auto kAttrInputNames = "input_names";
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..25432336130956788222a1cfbde3107fee62627f
--- /dev/null
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion_test.cc
@@ -0,0 +1,53 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "common/backend_common_test.h"
+#include "common/py_func_graph_fetcher.h"
+#include "pre_activate/common/optimizer.h"
+#include "pre_activate/ascend/ir_fusion/softmax_grad_ext_fusion.h"
+#include "debug/anf_ir_dump.h"
+
+namespace mindspore {
+namespace opt {
+class TestHWOptSoftmaxGradExtFusion : public BackendCommon {
+ public:
+  TestHWOptSoftmaxGradExtFusion() : get_py_fun_("gtest_input.pre_activate.softmax_grad_ext_fusion", true) {}
+  ~TestHWOptSoftmaxGradExtFusion() override = default;
+
+  UT::PyFuncGraphFetcher get_py_fun_;
+};
+
+TEST_F(TestHWOptSoftmaxGradExtFusion, test_fusion) {
+  FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_softmax_grad_ext_fusion", "before");
+  EXPECT_NE(g, nullptr);
+  std::vector<int> shp{1, 1, 1, 1};
+  auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
+  AbstractBasePtrList args_spec_list;
+  for (size_t i = 0; i < 3; ++i) {
+    args_spec_list.push_back(x_abstract);
+  }
+  auto fg = GetKernelGraph(g, args_spec_list);
+
+  auto optimizer = std::make_shared<opt::GraphOptimizer>();
+  auto pm = std::make_shared<opt::PassManager>();
+  pm->AddPass(std::make_shared<opt::SoftmaxGradExtFusion>());
+  optimizer->AddPassManager(pm);
+  FuncGraphPtr new_graph = optimizer->Optimize(fg);
+
+  FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_softmax_grad_ext_fusion", "after");
+  EXPECT_TRUE(CheckEqualGraph(g_after, new_graph));
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/softmax_grad_ext_fusion.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/softmax_grad_ext_fusion.py
new file mode 100644
index 0000000000000000000000000000000000000000..fbcc3d7480d738da391e3b940113d4e57379b209
--- /dev/null
+++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/softmax_grad_ext_fusion.py
@@ -0,0 +1,56 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+from mindspore.ops import Primitive
+from mindspore.ops import operations as P
+
+Mul = P.Mul()
+ReduceSum = P.ReduceSum()
+Sub = P.Sub()
+SoftmaxGradExt = Primitive('SoftmaxGradExt')
+MakeTuple = Primitive('make_tuple')
+TupleGetItem = Primitive('tuple_getitem')
+axes = (2, 3)
+
+
+class FnDict:
+    def __init__(self):
+        self.fnDict = {}
+
+    def __call__(self, fn):
+        self.fnDict[fn.__name__] = fn
+
+    def __getitem__(self, name):
+        return self.fnDict[name]
+
+
+def test_softmax_grad_ext_fusion(tag):
+    fns = FnDict()
+
+    @fns
+    def before(input0, input1, input2):
+        mul = Mul(input1, input0)
+        # input axis will be convert to attr in step ConstructKernelGraph
+        reduce_sum = ReduceSum(mul, axes)
+        sub = Sub(input0, reduce_sum)
+        mul1 = Mul(input2, input1)
+        mul_grad = Mul(mul1, sub)
+        return mul_grad
+
+    @fns
+    def after(input0, input1, input2):
+        res = SoftmaxGradExt(input0, input1, input2)
+        return MakeTuple(res)
+
+    return fns[tag]