diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc index 432d88e7a4fefb0814b4d85b1224e4b776fb91d4..6ede069eb3ede36e976ac5989a115e113edac59c 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc @@ -58,6 +58,7 @@ #include "pre_activate/ascend/ir_fission/add_memcpy_async.h" #include "pre_activate/ascend/format_type/insert_cast_for_runop.h" #include "pre_activate/ascend/format_type/insert_transdata_for_runop.h" +#include "pre_activate/ascend/ir_fission/addn_fission.h" #include "utils/context/ms_context.h" #include "debug/anf_ir_dump.h" #include "debug/anf_ir_utils.h" @@ -175,6 +176,7 @@ void AscendBackendIRFusionOptimization(const std::shared_ptrAddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); } diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.cc new file mode 100644 index 0000000000000000000000000000000000000000..f6eb6aca64ece922137077d2646327a4da011e77 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.cc @@ -0,0 +1,81 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/ir_fission/addn_fission.h" +#include +#include +#include "session/anf_runtime_algorithm.h" + +namespace mindspore { +namespace opt { +namespace { +AnfNodePtr CreateNewAddn(const FuncGraphPtr &func_graph, const CNodePtr &origin_addn_cnode, size_t begin_index, + size_t offset) { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(origin_addn_cnode); + std::vector new_addn_inputs{NewValueNode(std::make_shared(prim::kPrimAddN->name()))}; + for (size_t i = begin_index; i < begin_index + offset; ++i) { + new_addn_inputs.push_back(origin_addn_cnode->input(i)); + } + CNodePtr new_addn = func_graph->NewCNode(new_addn_inputs); + MS_EXCEPTION_IF_NULL(new_addn); + new_addn->set_scope(origin_addn_cnode->scope()); + new_addn->set_abstract(origin_addn_cnode->abstract()); + AnfAlgo::SetNodeAttr(kAttrN, MakeValue(SizeToInt(offset)), new_addn); + return new_addn; +} +} // namespace + +const BaseRef AddnFission::DefinePattern() const { + VarPtr Xs = std::make_shared(); + return VectorRef({prim::kPrimAddN, Xs}); +} + +const AnfNodePtr AddnFission::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(node); + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + // The real input begins with index 1. + size_t origin_input_size = cnode->inputs().size() - 1; + if (origin_input_size <= inputs_divisor_) { + return nullptr; + } + CNodePtr new_cnode = cnode; + while (origin_input_size > inputs_divisor_) { + std::vector base_addn_inputs{NewValueNode(std::make_shared(prim::kPrimAddN->name()))}; + size_t cur_input_index = 1; + // Divide the inputs of addn by 63. + while (origin_input_size - cur_input_index + 1 > inputs_divisor_) { + base_addn_inputs.push_back(CreateNewAddn(func_graph, new_cnode, cur_input_index, inputs_divisor_)); + cur_input_index += inputs_divisor_; + } + base_addn_inputs.push_back( + CreateNewAddn(func_graph, new_cnode, cur_input_index, origin_input_size - cur_input_index + 1)); + + CNodePtr base_addn = func_graph->NewCNode(base_addn_inputs); + MS_EXCEPTION_IF_NULL(base_addn); + MS_EXCEPTION_IF_NULL(new_cnode); + base_addn->set_scope(new_cnode->scope()); + base_addn->set_abstract(new_cnode->abstract()); + AnfAlgo::SetNodeAttr(kAttrN, MakeValue(SizeToInt(base_addn_inputs.size() - 1)), base_addn); + new_cnode = base_addn; + origin_input_size = base_addn->inputs().size() - 1; + } + + return new_cnode; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.h b/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.h new file mode 100644 index 0000000000000000000000000000000000000000..3c62391f9a5bbe08a98b8c9ab980c29a561e14ac --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/addn_fission.h @@ -0,0 +1,37 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADDN_FISSION_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADDN_FISSION_H_ + +#include "pre_activate/common/optimizer.h" + +namespace mindspore { +namespace opt { +constexpr size_t kAddnInputsDivisor = 63; +class AddnFission : public PatternProcessPass { + public: + explicit AddnFission(bool multigraph = true) + : PatternProcessPass("addn_fission", multigraph), inputs_divisor_(kAddnInputsDivisor) {} + ~AddnFission() override = default; + const BaseRef DefinePattern() const override; + const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; + + private: + size_t inputs_divisor_; +}; +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADDN_FISSION_H_ diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h index ea5e969e524a623d4e308468f163f29f90185798..2b35168ec350a58d3b33e08c9406913ddacafc74 100644 --- a/mindspore/ccsrc/utils/utils.h +++ b/mindspore/ccsrc/utils/utils.h @@ -142,6 +142,7 @@ constexpr auto kAttrDynInputSizes = "dyn_input_sizes"; constexpr auto kAttrSrcFormat = "src_format"; constexpr auto kAttrOutputUsedNum = "output_used_num"; constexpr auto kAttrHasBias = "has_bias"; +constexpr auto kAttrN = "N"; // attr value constexpr auto kValueTargetSwitch = "target_switch"; diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/addn_fission_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/addn_fission_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..90174636b1fe5c090610d36bf543bfec6dd716ba --- /dev/null +++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/addn_fission_test.cc @@ -0,0 +1,160 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common/backend_common_test.h" +#include "common/py_func_graph_fetcher.h" +#define private public +#define protected public +#include "pre_activate/ascend/ir_fission/addn_fission.h" +#undef private +#undef protected + +namespace mindspore { +namespace opt { +class TestHWAddnFission : public BackendCommon { + public: + TestHWAddnFission() : get_py_fun_("gtest_input.pre_activate.addn_fission_test", true) {} + ~TestHWAddnFission() override = default; + + UT::PyFuncGraphFetcher get_py_fun_; +}; + +TEST_F(TestHWAddnFission, test_addn_fission_divided_by_2) { + FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_addn_fission", "before"); + EXPECT_NE(g, nullptr); + std::vector shp{2, 32, 224, 224}; + auto x_abstract = std::make_shared(kFloat32, shp); + AbstractBasePtrList args_spec_list; + for (size_t i = 0; i < 9; ++i) { + args_spec_list.push_back(x_abstract); + } + auto kg = GetKernelGraph(g, args_spec_list); + + auto optimizer = std::make_shared(); + auto pm = std::make_shared(); + auto addn_fission = std::make_shared(); + addn_fission->inputs_divisor_ = 2; + pm->AddPass(addn_fission); + optimizer->AddPassManager(pm); + FuncGraphPtr new_graph = optimizer->Optimize(kg); + + FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_addn_fission", "after_divided_by_2"); + EXPECT_NE(g_after, nullptr); + auto kg_after = GetKernelGraph(g_after, args_spec_list); + EXPECT_TRUE(CheckEqualGraph(kg_after, new_graph)); +} + +TEST_F(TestHWAddnFission, test_addn_fission_divided_by_3) { + FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_addn_fission", "before"); + EXPECT_NE(g, nullptr); + std::vector shp{2, 32, 224, 224}; + auto x_abstract = std::make_shared(kFloat32, shp); + AbstractBasePtrList args_spec_list; + for (size_t i = 0; i < 9; ++i) { + args_spec_list.push_back(x_abstract); + } + auto kg = GetKernelGraph(g, args_spec_list); + + auto optimizer = std::make_shared(); + auto pm = std::make_shared(); + auto addn_fission = std::make_shared(); + addn_fission->inputs_divisor_ = 3; + pm->AddPass(addn_fission); + optimizer->AddPassManager(pm); + FuncGraphPtr new_graph = optimizer->Optimize(kg); + + FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_addn_fission", "after_divided_by_3"); + EXPECT_NE(g_after, nullptr); + auto kg_after = GetKernelGraph(g_after, args_spec_list); + EXPECT_TRUE(CheckEqualGraph(kg_after, new_graph)); +} + +TEST_F(TestHWAddnFission, test_addn_fission_divided_by_4) { + FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_addn_fission", "before"); + EXPECT_NE(g, nullptr); + std::vector shp{2, 32, 224, 224}; + auto x_abstract = std::make_shared(kFloat32, shp); + AbstractBasePtrList args_spec_list; + for (size_t i = 0; i < 9; ++i) { + args_spec_list.push_back(x_abstract); + } + auto kg = GetKernelGraph(g, args_spec_list); + + auto optimizer = std::make_shared(); + auto pm = std::make_shared(); + auto addn_fission = std::make_shared(); + addn_fission->inputs_divisor_ = 4; + pm->AddPass(addn_fission); + optimizer->AddPassManager(pm); + FuncGraphPtr new_graph = optimizer->Optimize(kg); + + FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_addn_fission", "after_divided_by_4"); + EXPECT_NE(g_after, nullptr); + auto kg_after = GetKernelGraph(g_after, args_spec_list); + EXPECT_TRUE(CheckEqualGraph(kg_after, new_graph)); +} + +TEST_F(TestHWAddnFission, test_addn_fission_divided_by_8) { + FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_addn_fission", "before"); + EXPECT_NE(g, nullptr); + std::vector shp{2, 32, 224, 224}; + auto x_abstract = std::make_shared(kFloat32, shp); + AbstractBasePtrList args_spec_list; + for (size_t i = 0; i < 9; ++i) { + args_spec_list.push_back(x_abstract); + } + auto kg = GetKernelGraph(g, args_spec_list); + + auto optimizer = std::make_shared(); + auto pm = std::make_shared(); + auto addn_fission = std::make_shared(); + addn_fission->inputs_divisor_ = 8; + pm->AddPass(addn_fission); + optimizer->AddPassManager(pm); + FuncGraphPtr new_graph = optimizer->Optimize(kg); + + FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_addn_fission", "after_divided_by_8"); + EXPECT_NE(g_after, nullptr); + auto kg_after = GetKernelGraph(g_after, args_spec_list); + EXPECT_TRUE(CheckEqualGraph(kg_after, new_graph)); +} + +TEST_F(TestHWAddnFission, test_addn_fission_divided_by_9) { + FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_addn_fission", "before"); + EXPECT_NE(g, nullptr); + std::vector shp{2, 32, 224, 224}; + auto x_abstract = std::make_shared(kFloat32, shp); + AbstractBasePtrList args_spec_list; + for (size_t i = 0; i < 9; ++i) { + args_spec_list.push_back(x_abstract); + } + auto kg = GetKernelGraph(g, args_spec_list); + + auto optimizer = std::make_shared(); + auto pm = std::make_shared(); + auto addn_fission = std::make_shared(); + addn_fission->inputs_divisor_ = 9; + pm->AddPass(addn_fission); + optimizer->AddPassManager(pm); + FuncGraphPtr new_graph = optimizer->Optimize(kg); + + FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_addn_fission", "after_divided_by_9"); + EXPECT_NE(g_after, nullptr); + auto kg_after = GetKernelGraph(g_after, args_spec_list); + EXPECT_TRUE(CheckEqualGraph(kg_after, new_graph)); +} +} // namespace opt +} // namespace mindspore diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/addn_fission_test.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/addn_fission_test.py new file mode 100644 index 0000000000000000000000000000000000000000..c120ac3e68e7eed4aa185224315a1b72af3a6ef9 --- /dev/null +++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/addn_fission_test.py @@ -0,0 +1,80 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from mindspore.ops import operations as P +from mindspore.ops import Primitive + +addn = P.AddN() +make_tuple = Primitive('make_tuple') + + +class FnDict: + def __init__(self): + self.fnDict = {} + + def __call__(self, fn): + self.fnDict[fn.__name__] = fn + + def __getitem__(self, name): + return self.fnDict[name] + + +def test_addn_fission(tag): + """ test_adam_apply_one_with_decay_rule """ + fns = FnDict() + + @fns + def before(input0, input1, input2, input3, input4, input5, input6, input7, input8): + return addn((input0, input1, input2, input3, input4, input5, input6, input7, input8)) + + @fns + def after_divided_by_2(input0, input1, input2, input3, input4, input5, input6, input7, input8): + a = addn((input0, input1)) + b = addn((input2, input3)) + c = addn((input4, input5)) + d = addn((input6, input7)) + e = addn((input8,)) + f = addn((a, b)) + g = addn((c, d)) + h = addn((e,)) + i = addn((f, g)) + j = addn((h,)) + return addn((i, j)) + + @fns + def after_divided_by_3(input0, input1, input2, input3, input4, input5, input6, input7, input8): + a = addn((input0, input1, input2)) + b = addn((input3, input4, input5)) + c = addn((input6, input7, input8)) + return addn((a, b, c)) + + @fns + def after_divided_by_4(input0, input1, input2, input3, input4, input5, input6, input7, input8): + a = addn((input0, input1, input2, input3)) + b = addn((input4, input5, input6, input7)) + c = addn((input8,)) + return addn((a, b, c)) + + @fns + def after_divided_by_8(input0, input1, input2, input3, input4, input5, input6, input7, input8): + a = addn((input0, input1, input2, input3, input4, input5, input6, input7)) + b = addn((input8,)) + return addn((a, b)) + + @fns + def after_divided_by_9(input0, input1, input2, input3, input4, input5, input6, input7, input8): + return addn((input0, input1, input2, input3, input4, input5, input6, input7, input8)) + + return fns[tag]