implement AddN fission pass

7307c81f · YuJianfeng · e8f6c1a4 · 7307c81f · 7307c81f · 7307c81f
6 changed file
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
@@ -58,6 +58,7 @@
 #include "pre_activate/ascend/ir_fission/add_memcpy_async.h"
 #include "pre_activate/ascend/format_type/insert_cast_for_runop.h"
 #include "pre_activate/ascend/format_type/insert_transdata_for_runop.h"
+#include "pre_activate/ascend/ir_fission/addn_fission.h"
 #include "utils/context/ms_context.h"
 #include "debug/anf_ir_dump.h"
 #include "debug/anf_ir_utils.h"
@@ -175,6 +176,7 @@ void AscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGrap
    ir_fusion_pm->AddPass(std::make_shared<MulAddFusion>());
    ir_fusion_pm->AddPass(std::make_shared<MulAddNFusion>());
    ir_fusion_pm->AddPass(std::make_shared<MatmulBiasaddFusion>());
+    ir_fusion_pm->AddPass(std::make_shared<AddnFission>());
    ir_fusion_pm->AddPass(std::make_shared<GetitemTuple>());
    ir_fusion_pm->AddPass(std::make_shared<TransposeTransDataFusion>());
  }

--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "pre_activate/ascend/ir_fission/addn_fission.h"
+#include <memory>
+#include <vector>
+#include "session/anf_runtime_algorithm.h"
+
+namespace mindspore {
+namespace opt {
+namespace {
+AnfNodePtr CreateNewAddn(const FuncGraphPtr &func_graph, const CNodePtr &origin_addn_cnode, size_t begin_index,
+                         size_t offset) {
+  MS_EXCEPTION_IF_NULL(func_graph);
+  MS_EXCEPTION_IF_NULL(origin_addn_cnode);
+  std::vector<AnfNodePtr> new_addn_inputs{NewValueNode(std::make_shared<Primitive>(prim::kPrimAddN->name()))};
+  for (size_t i = begin_index; i < begin_index + offset; ++i) {
+    new_addn_inputs.push_back(origin_addn_cnode->input(i));
+  }
+  CNodePtr new_addn = func_graph->NewCNode(new_addn_inputs);
+  MS_EXCEPTION_IF_NULL(new_addn);
+  new_addn->set_scope(origin_addn_cnode->scope());
+  new_addn->set_abstract(origin_addn_cnode->abstract());
+  AnfAlgo::SetNodeAttr(kAttrN, MakeValue(SizeToInt(offset)), new_addn);
+  return new_addn;
+}
+}  // namespace
+
+const BaseRef AddnFission::DefinePattern() const {
+  VarPtr Xs = std::make_shared<SeqVar>();
+  return VectorRef({prim::kPrimAddN, Xs});
+}
+
+const AnfNodePtr AddnFission::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const {
+  MS_EXCEPTION_IF_NULL(func_graph);
+  MS_EXCEPTION_IF_NULL(node);
+  auto cnode = node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(cnode);
+  // The real input begins with index 1.
+  size_t origin_input_size = cnode->inputs().size() - 1;
+  if (origin_input_size <= inputs_divisor_) {
+    return nullptr;
+  }
+  CNodePtr new_cnode = cnode;
+  while (origin_input_size > inputs_divisor_) {
+    std::vector<AnfNodePtr> base_addn_inputs{NewValueNode(std::make_shared<Primitive>(prim::kPrimAddN->name()))};
+    size_t cur_input_index = 1;
+    // Divide the inputs of addn by 63.
+    while (origin_input_size - cur_input_index + 1 > inputs_divisor_) {
+      base_addn_inputs.push_back(CreateNewAddn(func_graph, new_cnode, cur_input_index, inputs_divisor_));
+      cur_input_index += inputs_divisor_;
+    }
+    base_addn_inputs.push_back(
+      CreateNewAddn(func_graph, new_cnode, cur_input_index, origin_input_size - cur_input_index + 1));
+
+    CNodePtr base_addn = func_graph->NewCNode(base_addn_inputs);
+    MS_EXCEPTION_IF_NULL(base_addn);
+    MS_EXCEPTION_IF_NULL(new_cnode);
+    base_addn->set_scope(new_cnode->scope());
+    base_addn->set_abstract(new_cnode->abstract());
+    AnfAlgo::SetNodeAttr(kAttrN, MakeValue(SizeToInt(base_addn_inputs.size() - 1)), base_addn);
+    new_cnode = base_addn;
+    origin_input_size = base_addn->inputs().size() - 1;
+  }
+
+  return new_cnode;
+}
+}  // namespace opt
+}  // namespace mindspore
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.h
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADDN_FISSION_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADDN_FISSION_H_
+
+#include "pre_activate/common/optimizer.h"
+
+namespace mindspore {
+namespace opt {
+constexpr size_t kAddnInputsDivisor = 63;
+class AddnFission : public PatternProcessPass {
+ public:
+  explicit AddnFission(bool multigraph = true)
+      : PatternProcessPass("addn_fission", multigraph), inputs_divisor_(kAddnInputsDivisor) {}
+  ~AddnFission() override = default;
+  const BaseRef DefinePattern() const override;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+
+ private:
+  size_t inputs_divisor_;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADDN_FISSION_H_
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@@ -142,6 +142,7 @@ constexpr auto kAttrDynInputSizes = "dyn_input_sizes";
 constexpr auto kAttrSrcFormat = "src_format";
 constexpr auto kAttrOutputUsedNum = "output_used_num";
 constexpr auto kAttrHasBias = "has_bias";
+constexpr auto kAttrN = "N";

 // attr value
 constexpr auto kValueTargetSwitch = "target_switch";

--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/addn_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/addn_fission_test.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common/backend_common_test.h"
+#include "common/py_func_graph_fetcher.h"
+#define private public
+#define protected public
+#include "pre_activate/ascend/ir_fission/addn_fission.h"
+#undef private
+#undef protected
+
+namespace mindspore {
+namespace opt {
+class TestHWAddnFission : public BackendCommon {
+ public:
+  TestHWAddnFission() : get_py_fun_("gtest_input.pre_activate.addn_fission_test", true) {}
+  ~TestHWAddnFission() override = default;
+
+  UT::PyFuncGraphFetcher get_py_fun_;
+};
+
+TEST_F(TestHWAddnFission, test_addn_fission_divided_by_2) {
+  FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_addn_fission", "before");
+  EXPECT_NE(g, nullptr);
+  std::vector<int> shp{2, 32, 224, 224};
+  auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
+  AbstractBasePtrList args_spec_list;
+  for (size_t i = 0; i < 9; ++i) {
+    args_spec_list.push_back(x_abstract);
+  }
+  auto kg = GetKernelGraph(g, args_spec_list);
+
+  auto optimizer = std::make_shared<opt::GraphOptimizer>();
+  auto pm = std::make_shared<opt::PassManager>();
+  auto addn_fission = std::make_shared<opt::AddnFission>();
+  addn_fission->inputs_divisor_ = 2;
+  pm->AddPass(addn_fission);
+  optimizer->AddPassManager(pm);
+  FuncGraphPtr new_graph = optimizer->Optimize(kg);
+
+  FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_addn_fission", "after_divided_by_2");
+  EXPECT_NE(g_after, nullptr);
+  auto kg_after = GetKernelGraph(g_after, args_spec_list);
+  EXPECT_TRUE(CheckEqualGraph(kg_after, new_graph));
+}
+
+TEST_F(TestHWAddnFission, test_addn_fission_divided_by_3) {
+  FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_addn_fission", "before");
+  EXPECT_NE(g, nullptr);
+  std::vector<int> shp{2, 32, 224, 224};
+  auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
+  AbstractBasePtrList args_spec_list;
+  for (size_t i = 0; i < 9; ++i) {
+    args_spec_list.push_back(x_abstract);
+  }
+  auto kg = GetKernelGraph(g, args_spec_list);
+
+  auto optimizer = std::make_shared<opt::GraphOptimizer>();
+  auto pm = std::make_shared<opt::PassManager>();
+  auto addn_fission = std::make_shared<opt::AddnFission>();
+  addn_fission->inputs_divisor_ = 3;
+  pm->AddPass(addn_fission);
+  optimizer->AddPassManager(pm);
+  FuncGraphPtr new_graph = optimizer->Optimize(kg);
+
+  FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_addn_fission", "after_divided_by_3");
+  EXPECT_NE(g_after, nullptr);
+  auto kg_after = GetKernelGraph(g_after, args_spec_list);
+  EXPECT_TRUE(CheckEqualGraph(kg_after, new_graph));
+}
+
+TEST_F(TestHWAddnFission, test_addn_fission_divided_by_4) {
+  FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_addn_fission", "before");
+  EXPECT_NE(g, nullptr);
+  std::vector<int> shp{2, 32, 224, 224};
+  auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
+  AbstractBasePtrList args_spec_list;
+  for (size_t i = 0; i < 9; ++i) {
+    args_spec_list.push_back(x_abstract);
+  }
+  auto kg = GetKernelGraph(g, args_spec_list);
+
+  auto optimizer = std::make_shared<opt::GraphOptimizer>();
+  auto pm = std::make_shared<opt::PassManager>();
+  auto addn_fission = std::make_shared<opt::AddnFission>();
+  addn_fission->inputs_divisor_ = 4;
+  pm->AddPass(addn_fission);
+  optimizer->AddPassManager(pm);
+  FuncGraphPtr new_graph = optimizer->Optimize(kg);
+
+  FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_addn_fission", "after_divided_by_4");
+  EXPECT_NE(g_after, nullptr);
+  auto kg_after = GetKernelGraph(g_after, args_spec_list);
+  EXPECT_TRUE(CheckEqualGraph(kg_after, new_graph));
+}
+
+TEST_F(TestHWAddnFission, test_addn_fission_divided_by_8) {
+  FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_addn_fission", "before");
+  EXPECT_NE(g, nullptr);
+  std::vector<int> shp{2, 32, 224, 224};
+  auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
+  AbstractBasePtrList args_spec_list;
+  for (size_t i = 0; i < 9; ++i) {
+    args_spec_list.push_back(x_abstract);
+  }
+  auto kg = GetKernelGraph(g, args_spec_list);
+
+  auto optimizer = std::make_shared<opt::GraphOptimizer>();
+  auto pm = std::make_shared<opt::PassManager>();
+  auto addn_fission = std::make_shared<opt::AddnFission>();
+  addn_fission->inputs_divisor_ = 8;
+  pm->AddPass(addn_fission);
+  optimizer->AddPassManager(pm);
+  FuncGraphPtr new_graph = optimizer->Optimize(kg);
+
+  FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_addn_fission", "after_divided_by_8");
+  EXPECT_NE(g_after, nullptr);
+  auto kg_after = GetKernelGraph(g_after, args_spec_list);
+  EXPECT_TRUE(CheckEqualGraph(kg_after, new_graph));
+}
+
+TEST_F(TestHWAddnFission, test_addn_fission_divided_by_9) {
+  FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_addn_fission", "before");
+  EXPECT_NE(g, nullptr);
+  std::vector<int> shp{2, 32, 224, 224};
+  auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
+  AbstractBasePtrList args_spec_list;
+  for (size_t i = 0; i < 9; ++i) {
+    args_spec_list.push_back(x_abstract);
+  }
+  auto kg = GetKernelGraph(g, args_spec_list);
+
+  auto optimizer = std::make_shared<opt::GraphOptimizer>();
+  auto pm = std::make_shared<opt::PassManager>();
+  auto addn_fission = std::make_shared<opt::AddnFission>();
+  addn_fission->inputs_divisor_ = 9;
+  pm->AddPass(addn_fission);
+  optimizer->AddPassManager(pm);
+  FuncGraphPtr new_graph = optimizer->Optimize(kg);
+
+  FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_addn_fission", "after_divided_by_9");
+  EXPECT_NE(g_after, nullptr);
+  auto kg_after = GetKernelGraph(g_after, args_spec_list);
+  EXPECT_TRUE(CheckEqualGraph(kg_after, new_graph));
+}
+}  // namespace opt
+}  // namespace mindspore
--- a/tests/ut/cpp/python_input/gtest_input/pre_activate/addn_fission_test.py
+++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/addn_fission_test.py
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+from mindspore.ops import operations as P
+from mindspore.ops import Primitive
+
+addn = P.AddN()
+make_tuple = Primitive('make_tuple')
+
+
+class FnDict:
+    def __init__(self):
+        self.fnDict = {}
+
+    def __call__(self, fn):
+        self.fnDict[fn.__name__] = fn
+
+    def __getitem__(self, name):
+        return self.fnDict[name]
+
+
+def test_addn_fission(tag):
+    """ test_adam_apply_one_with_decay_rule """
+    fns = FnDict()
+
+    @fns
+    def before(input0, input1, input2, input3, input4, input5, input6, input7, input8):
+        return addn((input0, input1, input2, input3, input4, input5, input6, input7, input8))
+
+    @fns
+    def after_divided_by_2(input0, input1, input2, input3, input4, input5, input6, input7, input8):
+        a = addn((input0, input1))
+        b = addn((input2, input3))
+        c = addn((input4, input5))
+        d = addn((input6, input7))
+        e = addn((input8,))
+        f = addn((a, b))
+        g = addn((c, d))
+        h = addn((e,))
+        i = addn((f, g))
+        j = addn((h,))
+        return addn((i, j))
+
+    @fns
+    def after_divided_by_3(input0, input1, input2, input3, input4, input5, input6, input7, input8):
+        a = addn((input0, input1, input2))
+        b = addn((input3, input4, input5))
+        c = addn((input6, input7, input8))
+        return addn((a, b, c))
+
+    @fns
+    def after_divided_by_4(input0, input1, input2, input3, input4, input5, input6, input7, input8):
+        a = addn((input0, input1, input2, input3))
+        b = addn((input4, input5, input6, input7))
+        c = addn((input8,))
+        return addn((a, b, c))
+
+    @fns
+    def after_divided_by_8(input0, input1, input2, input3, input4, input5, input6, input7, input8):
+        a = addn((input0, input1, input2, input3, input4, input5, input6, input7))
+        b = addn((input8,))
+        return addn((a, b))
+
+    @fns
+    def after_divided_by_9(input0, input1, input2, input3, input4, input5, input6, input7, input8):
+        return addn((input0, input1, input2, input3, input4, input5, input6, input7, input8))
+
+    return fns[tag]