diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc index 8f3e5a5737e151e4f7dc7aceba6113c3d664dd10..898b8abcb901c7dcf44e14753f28caab73327e28 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc @@ -63,10 +63,17 @@ #include "pre_activate/ascend/format_type/merge_cast_to_op.h" #include "pre_activate/ascend/format_type/check_consistency.h" #include "pre_activate/ascend/buffer_fusion/buffer_fusion.h" -#include "pre_activate/ascend/buffer_fusion/tbe_buffer_fusion.h" -#include "pre_activate/ascend/buffer_fusion/pass/depthwiseconv_eltwise_fusion_pass.h" -#include "pre_activate/ascend/buffer_fusion/pass/bnupdate_eltwise_fusion_pass.h" -#include "pre_activate/ascend/buffer_fusion/pass/fusion_type_fusion_pass.h" +#include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h" +#include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h" +#include "pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h" +#include "pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h" +#include "pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h" +#include "pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h" +#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h" +#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h" +#include "pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h" +#include "pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h" +#include "pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h" #include "pre_activate/ascend/format_type/deal_ref_trans_and_cast.h" #include "pre_activate/ascend/enhancer/add_memcpy_async.h" #include "pre_activate/ascend/format_type/insert_cast_for_runop.h" @@ -281,7 +288,6 @@ void AscendBackendOptimization(const std::shared_ptr &kern AscendDataLayout(kernel_graph); // mixed precision optimization AscendMixPrecision(kernel_graph); - // buffer fusion // other optimization auto optimizer = std::make_shared(); auto other_pm = std::make_shared("other_pm"); @@ -290,7 +296,6 @@ void AscendBackendOptimization(const std::shared_ptr &kern other_pm->AddPass(std::make_shared()); other_pm->AddPass(std::make_shared()); other_pm->AddPass(std::make_shared()); - other_pm->AddPass(std::make_shared()); other_pm->AddPass(std::make_shared()); other_pm->AddPass(std::make_shared()); if (context_ptr->enable_task_sink() && context_ptr->loop_sink_flag() && ConfigManager::GetInstance().iter_num() > 1) { @@ -306,6 +311,8 @@ void AscendBackendOptimization(const std::shared_ptr &kern DumpIR(file_path, kernel_graph, true); DumpIRProto(kernel_graph, "after_hwopt"); } + // buffer fusion + AscendBackendUBFusionOptimization(kernel_graph); } void AscendBackendUBFusionOptimization(const std::shared_ptr &kernel_graph) { @@ -329,10 +336,17 @@ void AscendBackendUBFusionOptimization(const std::shared_ptrInit(); auto optimizer = std::make_shared(); auto ub_fusion_pm = std::make_shared("ub_fusion_pm"); + ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator.get())); + ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator.get())); + ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator.get())); + ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator.get())); ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator.get())); ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator.get())); - ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator.get())); - ub_fusion_pm->AddPass(std::make_shared()); + ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator.get())); + ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator.get())); + ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator.get())); + ub_fusion_pm->AddPass(std::make_shared(fusion_id_allocator.get())); + ub_fusion_pm->AddPass(std::make_shared()); optimizer->AddPassManager(ub_fusion_pm); (void)optimizer->Optimize(kernel_graph); kernel_graph->SetExecOrderByDefault(); diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..8c4b1dcc63f5f591d4fc81d3159327424630c9f3 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.cc @@ -0,0 +1,82 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h" +#include +#include +#include +#include +#include "kernel/kernel_fusion.h" +#include "debug/anf_ir_dump.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" +#include "utils/context/ms_context.h" +#include "pre_activate/common/fusion_id_allocator.h" + +namespace mindspore { +namespace opt { +void BnupdateEltwiseEltwiseFusionPass::MatchBnupdateAddRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, + const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + auto add = relu_input->cast(); + MS_EXCEPTION_IF_NULL(add); + auto tuple_getitem = add->input(1); + if (tuple_getitem->isa() && AnfAlgo::GetCNodeName(tuple_getitem) == prim::kPrimTupleGetItem->name()) { + auto getitem = tuple_getitem->cast(); + auto bnupdate = getitem->input(1); + if (bnupdate->isa() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { + std::vector output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); + for (auto out_getitem : manager->node_users()[bnupdate]) { + auto out_getitem_ptr = out_getitem.first->cast(); + auto input2 = out_getitem_ptr->input(2); + auto output_idx = GetValue(GetValueNode(input2)); + output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); + } + AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); + std::unordered_set record{cnode, relu_input, bnupdate}; + candidate_fusion->push_back(record); + SetRecordFusionId(record); + } + } +} + +void BnupdateEltwiseEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(candidate_fusion); + std::vector node_list = TopoSort(kernel_graph.get_return()); + for (auto &node : node_list) { + if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || + AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { + auto eltwise_input = cnode->input(1); + if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { + if (eltwise_input->isa() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimTensorAdd)) { + MatchBnupdateAddRelu(cnode, eltwise_input, kernel_graph, candidate_fusion); + } + } + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..59b7b25d8dedc348ca2736ccc03c1033b8733e89 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_ELTWISE_FUSION_PASS_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_ELTWISE_FUSION_PASS_H_ + +#include +#include + +#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +using FusedNodeRecord = std::vector>; + +class BnupdateEltwiseEltwiseFusionPass : public FusionBasePass { + public: + explicit BnupdateEltwiseEltwiseFusionPass(FusionIdAllocator *idAllocator) + : FusionBasePass("BnupdateEltwiseEltwiseFusionPass", idAllocator) {} + ~BnupdateEltwiseEltwiseFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; + + private: + void MatchBnupdateAddRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, + const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_ELTWISE_FUSION_PASS_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/bnupdate_eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc similarity index 75% rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/bnupdate_eltwise_fusion_pass.cc rename to mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc index ade115acde47d349e0bc410d2f3258fd2d3f26bb..348504345a3cb98b0d4c891cfaca661e2cfdd97a 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/bnupdate_eltwise_fusion_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.cc @@ -1,96 +1,77 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "pre_activate/ascend/buffer_fusion/pass/bnupdate_eltwise_fusion_pass.h" -#include -#include -#include -#include -#include "kernel/kernel_fusion.h" -#include "debug/anf_ir_dump.h" -#include "session/anf_runtime_algorithm.h" -#include "operator/ops.h" -#include "utils/context/ms_context.h" -#include "pre_activate/common/fusion_id_allocator.h" - -namespace mindspore { -namespace opt { -void BnupdateEltwiseFusionPass::MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, - const session::KernelGraph &kernel_graph, - FusedNodeRecord *candidate_fusion) { - MS_EXCEPTION_IF_NULL(cnode); - MS_EXCEPTION_IF_NULL(candidate_fusion); - auto manager = kernel_graph.manager(); - MS_EXCEPTION_IF_NULL(manager); - auto getitem = relu_input->cast(); - auto bnupdate = getitem->input(1); - if (bnupdate->isa() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { - std::vector output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); - for (auto out_getitem : manager->node_users()[bnupdate]) { - auto out_getitem_ptr = out_getitem.first->cast(); - auto input2 = out_getitem_ptr->input(2); - auto output_idx = GetValue(GetValueNode(input2)); - output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); - } - AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); - std::unordered_set record{cnode, bnupdate}; - candidate_fusion->push_back(record); - SetRecordFusionId(record); - } -} - -void BnupdateEltwiseFusionPass::MatchBnupdateOpNamePattern(const session::KernelGraph &kernel_graph, - FusedNodeRecord *candidate_fusion) { - MS_EXCEPTION_IF_NULL(candidate_fusion); - std::vector node_list = TopoSort(kernel_graph.get_return()); - for (auto &node : node_list) { - if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || - AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { - continue; - } - auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); - if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && - AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { - auto eltwise_input = cnode->input(1); - if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { - if (eltwise_input->isa() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimTupleGetItem)) { - MatchBnupdateRelu(cnode, eltwise_input, kernel_graph, candidate_fusion); - } - } - } - } -} - -bool BnupdateEltwiseFusionPass::MatchUBFusionPattern(const session::KernelGraph &kernel_graph) { - auto manager = kernel_graph.manager(); - MS_EXCEPTION_IF_NULL(manager); - auto return_node = kernel_graph.get_return(); - MS_EXCEPTION_IF_NULL(return_node); - if (return_node->inputs().size() <= 1) { - return false; - } - MS_LOG(DEBUG) << "MatchBufferFusionPattern start..."; - FusedNodeRecord candidate_fusion; - - MatchBnupdateOpNamePattern(kernel_graph, &candidate_fusion); - if (candidate_fusion.empty()) { - return false; - } - MS_LOG(DEBUG) << "MatchBufferFusionPattern Success..."; - return true; -} -} // namespace opt -} // namespace mindspore +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h" +#include +#include +#include +#include +#include "kernel/kernel_fusion.h" +#include "debug/anf_ir_dump.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" +#include "utils/context/ms_context.h" +#include "pre_activate/common/fusion_id_allocator.h" + +namespace mindspore { +namespace opt { +void BnupdateEltwiseFusionPass::MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, + const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + auto getitem = relu_input->cast(); + auto bnupdate = getitem->input(1); + if (bnupdate->isa() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { + std::vector output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); + for (auto out_getitem : manager->node_users()[bnupdate]) { + auto out_getitem_ptr = out_getitem.first->cast(); + auto input2 = out_getitem_ptr->input(2); + auto output_idx = GetValue(GetValueNode(input2)); + output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); + } + AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); + std::unordered_set record{cnode, bnupdate}; + candidate_fusion->push_back(record); + SetRecordFusionId(record); + } +} + +void BnupdateEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(candidate_fusion); + std::vector node_list = TopoSort(kernel_graph.get_return()); + for (auto &node : node_list) { + if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || + AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { + auto eltwise_input = cnode->input(1); + if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { + if (eltwise_input->isa() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimTupleGetItem)) { + MatchBnupdateRelu(cnode, eltwise_input, kernel_graph, candidate_fusion); + } + } + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/bnupdate_eltwise_fusion_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h similarity index 79% rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/bnupdate_eltwise_fusion_pass.h rename to mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h index ab112d004a73375f72788246625c58eb0e92781e..b9db5c68b7ef6747ccae153d59b7de015ad94e94 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/bnupdate_eltwise_fusion_pass.h +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h @@ -1,50 +1,48 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ -#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ - -#include -#include - -#include "pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.h" -#include "ir/anf.h" -#include "pre_activate/common/pass.h" -#include "pre_activate/common/fusion_id_allocator.h" -#include "device/kernel_info.h" -#include "kernel/kernel.h" -#include "session/kernel_graph.h" - -namespace mindspore { -namespace opt { -using FusedNodeRecord = std::vector>; - -class BnupdateEltwiseFusionPass : public FusionBasePass { - public: - BnupdateEltwiseFusionPass() : FusionBasePass("BnupdateEltwiseFusionPass") {} - explicit BnupdateEltwiseFusionPass(FusionIdAllocator *idAllocator) - : FusionBasePass("BnupdateEltwiseFusionPass", idAllocator) {} - ~BnupdateEltwiseFusionPass() override = default; - bool MatchUBFusionPattern(const session::KernelGraph &kernel_graph) override; - - private: - void MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, - FusedNodeRecord *candidate_fusion); - void MatchBnupdateOpNamePattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); -}; -} // namespace opt -} // namespace mindspore - -#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ + +#include +#include + +#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +using FusedNodeRecord = std::vector>; + +class BnupdateEltwiseFusionPass : public FusionBasePass { + public: + explicit BnupdateEltwiseFusionPass(FusionIdAllocator *idAllocator) + : FusionBasePass("BnupdateEltwiseFusionPass", idAllocator) {} + ~BnupdateEltwiseFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; + + private: + void MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion); +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_BNUPDATE_ELTWISE_FUSION_PASS_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..2b243dbdac2fc5749aee64dd50a7e057603d70ac --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.cc @@ -0,0 +1,64 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h" + +#include +#include +#include +#include +#include "kernel/kernel_fusion.h" +#include "debug/anf_ir_dump.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" +#include "utils/context/ms_context.h" +#include "pre_activate/common/fusion_id_allocator.h" + +namespace mindspore { +namespace opt { +void ConvBnReduceFusionPass::MatchConvBnreduce(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + auto conv = cnode->input(1); + if (conv->isa() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) { + std::vector output_used_num{SizeToInt(manager->node_users()[conv].size())}; + AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), conv); + std::unordered_set record{cnode, conv}; + candidate_fusion->push_back(record); + SetRecordFusionId(record); + } +} + +void ConvBnReduceFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(candidate_fusion); + std::vector node_list = TopoSort(kernel_graph.get_return()); + for (auto &node : node_list) { + if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || + AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::GetCNodeName(cnode) == kBNTrainingReduceOpName) { + MatchConvBnreduce(cnode, kernel_graph, candidate_fusion); + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..aa835b5ba73ad0b1d522757ee3921059e5198184 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_CONV_BNREDUCE_FUSION_PASS_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_CONV_BNREDUCE_FUSION_PASS_H_ + +#include +#include + +#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +using FusedNodeRecord = std::vector>; + +class ConvBnReduceFusionPass : public FusionBasePass { + public: + explicit ConvBnReduceFusionPass(FusionIdAllocator *idAllocator) + : FusionBasePass("ConvBnReduceFusionPass", idAllocator) {} + ~ConvBnReduceFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; + + private: + void MatchConvBnreduce(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion); +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_CONV_BNREDUCE_FUSION_PASS_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..4c6902816c1f55a48430aeca3a83bd880d2fd630 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.cc @@ -0,0 +1,89 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h" +#include +#include +#include +#include +#include "kernel/kernel_fusion.h" +#include "debug/anf_ir_dump.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" +#include "utils/context/ms_context.h" +#include "pre_activate/common/fusion_id_allocator.h" + +namespace mindspore { +namespace opt { +bool ConvDoubleInFusionPass::CheckDoubleInEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(manager); + if (!node->isa() || !AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node)) { + return false; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto user_nodes = manager->node_users()[node]; + return AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && user_nodes.size() == ELTWISE_USE && + cnode->inputs().size() == ELTWISE_INPUT_SIZE; +} + +void ConvDoubleInFusionPass::MatchConvDoubleInEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + std::unordered_set record{cnode}; + auto eltwise_input = cnode->input(1); + if (CheckDoubleInEltWiseNode(manager.get(), eltwise_input)) { + (void)record.insert(eltwise_input); + } else { + return; + } + auto input_cnode = eltwise_input->cast(); + MS_EXCEPTION_IF_NULL(input_cnode); + auto double_in_eltwise_input = input_cnode->input(1); + if (!double_in_eltwise_input->isa() || !AnfAlgo::IsRealCNodeKernel(double_in_eltwise_input) || + fusion_id_allocator->HasFusionIdAttr(double_in_eltwise_input)) { + return; + } + if (AnfAlgo::GetKernelType(double_in_eltwise_input) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(double_in_eltwise_input) == kernel::FusionType::CONVLUTION) { + (void)record.insert(double_in_eltwise_input); + candidate_fusion->push_back(record); + SetRecordFusionId(record); + } +} + +void ConvDoubleInFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(candidate_fusion); + std::vector node_list = TopoSort(kernel_graph.get_return()); + for (auto &node : node_list) { + if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || + AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { + MatchConvDoubleInEltwise(cnode, kernel_graph, candidate_fusion); + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..6bcc40789a2701bb0abb55f50ed565977509d05b --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_DOUBLE_IN_FUSION_PASS_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_DOUBLE_IN_FUSION_PASS_H_ + +#include +#include + +#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +using FusedNodeRecord = std::vector>; + +class ConvDoubleInFusionPass : public FusionBasePass { + public: + explicit ConvDoubleInFusionPass(FusionIdAllocator *idAllocator) + : FusionBasePass("ConvDoubleInFusionPass", idAllocator) {} + ~ConvDoubleInFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; + + private: + void MatchConvDoubleInEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion); + bool CheckDoubleInEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node); +}; +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_DOUBLE_IN_FUSION_PASS_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..c07c30f11c904415669503a4cbccdd21c4afbe67 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.cc @@ -0,0 +1,77 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h" +#include +#include +#include +#include +#include "kernel/kernel_fusion.h" +#include "debug/anf_ir_dump.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" +#include "utils/context/ms_context.h" +#include "pre_activate/common/fusion_id_allocator.h" + +namespace mindspore { +namespace opt { +void ConvSingleInFusionPass::MatchConvSingleInEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + std::unordered_set record{cnode}; + auto eltwise_input = cnode->input(1); + while (CheckEltWiseNode(manager.get(), eltwise_input)) { + (void)record.insert(eltwise_input); + auto input_cnode = eltwise_input->cast(); + MS_EXCEPTION_IF_NULL(input_cnode); + eltwise_input = input_cnode->input(1); + if (record.size() == MAX_ELTWISE_NUM) { + break; + } + } + if (!eltwise_input->isa() || !AnfAlgo::IsRealCNodeKernel(eltwise_input) || + fusion_id_allocator->HasFusionIdAttr(eltwise_input)) { + return; + } + if (AnfAlgo::GetKernelType(eltwise_input) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(eltwise_input) == kernel::FusionType::CONVLUTION) { + (void)record.insert(eltwise_input); + candidate_fusion->push_back(record); + SetRecordFusionId(record); + } +} + +void ConvSingleInFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(candidate_fusion); + std::vector node_list = TopoSort(kernel_graph.get_return()); + for (auto &node : node_list) { + if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || + AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { + MatchConvSingleInEltwise(cnode, kernel_graph, candidate_fusion); + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/fusion_type_fusion_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h similarity index 54% rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/fusion_type_fusion_pass.h rename to mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h index 2e008633b4f7c08f922518dae322fc469dd508fd..2824b6c883f8bbc54bad6b659e90cb6bac4d699e 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/fusion_type_fusion_pass.h +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h @@ -1,47 +1,48 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_TYPE_FUSION_PASS_H_ -#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_TYPE_FUSION_PASS_H_ -#include -#include -#include - -#include "pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.h" -#include "ir/anf.h" -#include "pre_activate/common/pass.h" -#include "pre_activate/common/fusion_id_allocator.h" -#include "device/kernel_info.h" -#include "kernel/kernel.h" -#include "session/kernel_graph.h" - -namespace mindspore { -namespace opt { -using FusedNodeRecord = std::vector>; - -class FusionTypeFusionPass : public FusionBasePass { - public: - FusionTypeFusionPass() : FusionBasePass("FusionTypeFusionPass") {} - explicit FusionTypeFusionPass(FusionIdAllocator *idAllocator) : FusionBasePass("FusionTypeFusionPass", idAllocator) {} - ~FusionTypeFusionPass() override = default; - bool MatchUBFusionPattern(const session::KernelGraph &kernel_graph) override; - - private: - void MatchFusionTypePattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); -}; -} // namespace opt -} // namespace mindspore - -#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_TYPE_FUSION_PASS_H_ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_SINGLE_IN_FUSION_PASS_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_SINGLE_IN_FUSION_PASS_H_ + +#include +#include + +#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +using FusedNodeRecord = std::vector>; + +class ConvSingleInFusionPass : public FusionBasePass { + public: + explicit ConvSingleInFusionPass(FusionIdAllocator *idAllocator) + : FusionBasePass("ConvSingleInFusionPass", idAllocator) {} + ~ConvSingleInFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; + + private: + void MatchConvSingleInEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion); +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_CONV_SINGLE_IN_FUSION_PASS_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/depthwiseconv_eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc similarity index 77% rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/depthwiseconv_eltwise_fusion_pass.cc rename to mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc index 124b61bf1a85624029d6a1f5072e6fbcbcc5c57d..f485e901d8ad2035f4e62bb1e28b39cba93bcdab 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/depthwiseconv_eltwise_fusion_pass.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.cc @@ -1,107 +1,89 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "pre_activate/ascend/buffer_fusion/pass/depthwiseconv_eltwise_fusion_pass.h" - -#include -#include -#include -#include -#include "kernel/kernel_fusion.h" -#include "debug/anf_ir_dump.h" -#include "session/anf_runtime_algorithm.h" -#include "operator/ops.h" -#include "utils/context/ms_context.h" -#include "pre_activate/common/fusion_id_allocator.h" - -namespace mindspore { -namespace opt { -void DepthwiseConvEltwiseFusionPass::MatchDepthwiseConvRelu(const CNodePtr &cnode, - const session::KernelGraph &kernel_graph, - FusedNodeRecord *candidate_fusion, bool is_order) { - MS_EXCEPTION_IF_NULL(cnode); - MS_EXCEPTION_IF_NULL(candidate_fusion); - auto manager = kernel_graph.manager(); - MS_EXCEPTION_IF_NULL(manager); - if (is_order) { - // DepthwiseConvolution--->Elemwise - auto depthwise_conv = cnode->input(1); - MS_EXCEPTION_IF_NULL(depthwise_conv); - if (cnode->isa() && IsPrimitiveCNode(depthwise_conv, prim::kPrimDepthwiseConv2dNative)) { - std::vector output_used_num{SizeToInt(manager->node_users()[depthwise_conv].size())}; - AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), depthwise_conv); - std::unordered_set record{cnode, depthwise_conv}; - candidate_fusion->push_back(record); - SetRecordFusionId(record); - } - } else { - // Elemwise-->DepthwiseConvolution - auto relu = cnode->input(1); - MS_EXCEPTION_IF_NULL(relu); - if (cnode->isa() && (IsPrimitiveCNode(relu, prim::kPrimRelu) || IsPrimitiveCNode(relu, prim::kPrimReluV2))) { - std::vector output_used_num{SizeToInt(manager->node_users()[relu].size())}; - AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), relu); - std::unordered_set record{cnode, relu}; - candidate_fusion->push_back(record); - SetRecordFusionId(record); - } - } -} - -void DepthwiseConvEltwiseFusionPass::MatchDepthwiseOpNamePattern(const session::KernelGraph &kernel_graph, - FusedNodeRecord *candidate_fusion) { - MS_EXCEPTION_IF_NULL(candidate_fusion); - std::vector node_list = TopoSort(kernel_graph.get_return()); - for (auto &node : node_list) { - if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || - AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { - continue; - } - auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); - if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && - AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { - auto eltwise_input = cnode->input(1); - if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { - if (eltwise_input->isa() && - AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimDepthwiseConv2dNative)) { - MatchDepthwiseConvRelu(cnode, kernel_graph, candidate_fusion, true); - } - } - } else if (AnfAlgo::GetCNodeName(cnode) == prim::kPrimDepthwiseConv2dNative->name()) { - MatchDepthwiseConvRelu(cnode, kernel_graph, candidate_fusion, false); - } - } -} - -bool DepthwiseConvEltwiseFusionPass::MatchUBFusionPattern(const session::KernelGraph &kernel_graph) { - auto manager = kernel_graph.manager(); - MS_EXCEPTION_IF_NULL(manager); - auto return_node = kernel_graph.get_return(); - MS_EXCEPTION_IF_NULL(return_node); - if (return_node->inputs().size() <= 1) { - return false; - } - MS_LOG(DEBUG) << "MatchBufferFusionPattern start..."; - FusedNodeRecord candidate_fusion; - MatchDepthwiseOpNamePattern(kernel_graph, &candidate_fusion); - if (candidate_fusion.empty()) { - return false; - } - MS_LOG(DEBUG) << "MatchBufferFusionPattern Success..."; - return true; -} -} // namespace opt -} // namespace mindspore +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h" + +#include +#include +#include +#include +#include "kernel/kernel_fusion.h" +#include "debug/anf_ir_dump.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" +#include "utils/context/ms_context.h" +#include "pre_activate/common/fusion_id_allocator.h" + +namespace mindspore { +namespace opt { +void DepthwiseConvEltwiseFusionPass::MatchDepthwiseConvRelu(const CNodePtr &cnode, + const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion, bool is_order) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + if (is_order) { + // DepthwiseConvolution--->Elemwise + auto depthwise_conv = cnode->input(1); + MS_EXCEPTION_IF_NULL(depthwise_conv); + if (cnode->isa() && IsPrimitiveCNode(depthwise_conv, prim::kPrimDepthwiseConv2dNative)) { + std::vector output_used_num{SizeToInt(manager->node_users()[depthwise_conv].size())}; + AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), depthwise_conv); + std::unordered_set record{cnode, depthwise_conv}; + candidate_fusion->push_back(record); + SetRecordFusionId(record); + } + } else { + // Elemwise-->DepthwiseConvolution + auto relu = cnode->input(1); + MS_EXCEPTION_IF_NULL(relu); + if (cnode->isa() && (IsPrimitiveCNode(relu, prim::kPrimRelu) || IsPrimitiveCNode(relu, prim::kPrimReluV2))) { + std::vector output_used_num{SizeToInt(manager->node_users()[relu].size())}; + AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), relu); + std::unordered_set record{cnode, relu}; + candidate_fusion->push_back(record); + SetRecordFusionId(record); + } + } +} + +void DepthwiseConvEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(candidate_fusion); + std::vector node_list = TopoSort(kernel_graph.get_return()); + for (auto &node : node_list) { + if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || + AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { + auto eltwise_input = cnode->input(1); + if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimRelu)) { + if (eltwise_input->isa() && + AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimDepthwiseConv2dNative)) { + MatchDepthwiseConvRelu(cnode, kernel_graph, candidate_fusion, true); + } + } + } else if (AnfAlgo::GetCNodeName(cnode) == prim::kPrimDepthwiseConv2dNative->name()) { + MatchDepthwiseConvRelu(cnode, kernel_graph, candidate_fusion, false); + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/depthwiseconv_eltwise_fusion_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h similarity index 79% rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/depthwiseconv_eltwise_fusion_pass.h rename to mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h index 66ede52d046bf3ba52c99addec5e8c9fefe8ef70..05d473bd1af4e7d0bcbe5538ee2e82296031bd50 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/depthwiseconv_eltwise_fusion_pass.h +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h @@ -1,50 +1,48 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ -#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ - -#include -#include - -#include "pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.h" -#include "ir/anf.h" -#include "pre_activate/common/pass.h" -#include "pre_activate/common/fusion_id_allocator.h" -#include "device/kernel_info.h" -#include "kernel/kernel.h" -#include "session/kernel_graph.h" - -namespace mindspore { -namespace opt { -using FusedNodeRecord = std::vector>; - -class DepthwiseConvEltwiseFusionPass : public FusionBasePass { - public: - DepthwiseConvEltwiseFusionPass() : FusionBasePass("DepthwiseConvEltwiseFusionPass") {} - explicit DepthwiseConvEltwiseFusionPass(FusionIdAllocator *idAllocator) - : FusionBasePass("DepthwiseConvEltwiseFusionPass", idAllocator) {} - ~DepthwiseConvEltwiseFusionPass() override = default; - bool MatchUBFusionPattern(const session::KernelGraph &kernel_graph) override; - - private: - void MatchDepthwiseConvRelu(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, - FusedNodeRecord *candidate_fusion, bool is_order); - void MatchDepthwiseOpNamePattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); -}; -} // namespace opt -} // namespace mindspore - -#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ + +#include +#include + +#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +using FusedNodeRecord = std::vector>; + +class DepthwiseConvEltwiseFusionPass : public FusionBasePass { + public: + explicit DepthwiseConvEltwiseFusionPass(FusionIdAllocator *idAllocator) + : FusionBasePass("DepthwiseConvEltwiseFusionPass", idAllocator) {} + ~DepthwiseConvEltwiseFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; + + private: + void MatchDepthwiseConvRelu(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion, bool is_order); +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_DEPTHWISECONV_ELTWISE_FUSION_PASS_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..42860de700489df6f52c4926e893b3e118a0d9e0 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.cc @@ -0,0 +1,72 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h" +#include +#include +#include +#include +#include "kernel/kernel_fusion.h" +#include "debug/anf_ir_dump.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" +#include "utils/context/ms_context.h" +#include "pre_activate/common/fusion_id_allocator.h" + +namespace mindspore { +namespace opt { +void EltwiseFusionPass::MatchEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + std::unordered_set record{cnode}; + auto eltwise_input = cnode->input(1); + while (CheckEltWiseNode(manager.get(), eltwise_input)) { + (void)record.insert(eltwise_input); + if (record.size() == MAX_ELTWISE_SIZE) { + break; + } + auto input_cnode = eltwise_input->cast(); + MS_EXCEPTION_IF_NULL(input_cnode); + eltwise_input = input_cnode->input(1); + } + if (record.size() < MIN_ELTWISE_SIZE) { + return; + } + candidate_fusion->push_back(record); + SetRecordFusionId(record); +} + +void EltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(candidate_fusion); + std::vector node_list = TopoSort(kernel_graph.get_return()); + for (auto &node : node_list) { + if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || + AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { + MatchEltwise(cnode, kernel_graph, candidate_fusion); + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..8cf9796e98f51670e3f917ade0f8e24c75c0c768 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_ELTWISE_FUSION_PASS_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_ELTWISE_FUSION_PASS_H_ + +#include +#include + +#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +using FusedNodeRecord = std::vector>; + +class EltwiseFusionPass : public FusionBasePass { + public: + explicit EltwiseFusionPass(FusionIdAllocator *idAllocator) : FusionBasePass("EltwiseFusionPass", idAllocator) {} + ~EltwiseFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; + + private: + void MatchEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion); +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_ELTWISE_FUSION_PASS_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..51e39ac9fde45ea4b7115f2d363ee5cfa3ef5ef8 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.cc @@ -0,0 +1,71 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" +#include +#include +#include "debug/anf_ir_dump.h" +#include "utils/context/ms_context.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "session/anf_runtime_algorithm.h" + +namespace mindspore { +namespace opt { +bool FusionBasePass::CheckEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(manager); + if (!node->isa() || !AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node)) { + return false; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto user_nodes = manager->node_users()[node]; + return AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && user_nodes.size() == ELTWISE_USE && + cnode->inputs().size() == ELTWISE_INPUT_SIZE; +} + +void FusionBasePass::SetRecordFusionId(const std::unordered_set &record) { + auto id = fusion_id_allocator->AllocateFusionId(); + for (auto node : record) { + fusion_id_allocator->SetFusionId(node, id); + } +} + +bool FusionBasePass::MatchUBFusionPattern(const session::KernelGraph &kernel_graph) { + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + auto return_node = kernel_graph.get_return(); + MS_EXCEPTION_IF_NULL(return_node); + if (return_node->inputs().size() <= 1) { + return false; + } + MS_LOG(DEBUG) << "MatchBufferFusionPattern start..."; + FusedNodeRecord candidate_fusion; + MatchSingleFusionPattern(kernel_graph, &candidate_fusion); + if (candidate_fusion.empty()) { + return false; + } + MS_LOG(DEBUG) << "MatchBufferFusionPattern Success..."; + return true; +} + +bool FusionBasePass::Run(const FuncGraphPtr &graph) { + MS_EXCEPTION_IF_NULL(graph); + auto kernel_graph = graph->cast>(); + MS_EXCEPTION_IF_NULL(kernel_graph); + return MatchUBFusionPattern(*kernel_graph); +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h similarity index 75% rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.h rename to mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h index 4a6161ca089ff8e319d3dc7c9ad15c83dca1dfd0..c44508318e20b0505eddecb2818722e150134041 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.h +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/fusion_base_pass.h @@ -1,50 +1,57 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ -#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ -#include -#include -#include -#include - -#include "ir/anf.h" -#include "pre_activate/common/pass.h" -#include "pre_activate/common/fusion_id_allocator.h" -#include "device/kernel_info.h" -#include "kernel/kernel.h" -#include "session/kernel_graph.h" - -namespace mindspore { -namespace opt { -using FusedNodeRecord = std::vector>; - -class FusionBasePass : public Pass { - public: - explicit FusionBasePass(const std::string &name) : Pass(name) {} - FusionBasePass(const std::string &name, FusionIdAllocator *idAllocator) - : Pass(name), fusion_id_allocator(idAllocator) {} - ~FusionBasePass() override = default; - bool Run(const FuncGraphPtr &graph) override; - - protected: - virtual bool MatchUBFusionPattern(const session::KernelGraph &kernel_graph) = 0; - void SetRecordFusionId(const std::unordered_set &record); - FusionIdAllocator *fusion_id_allocator; -}; -} // namespace opt -} // namespace mindspore - -#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ +#include +#include +#include +#include + +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +const int8_t MAX_ELTWISE_NUM = 3; +const int8_t MIN_ELTWISE_SIZE = 2; +const int8_t ELTWISE_INPUT_SIZE = 2; +const int8_t ELTWISE_USE = 1; +const int8_t MAX_ELTWISE_SIZE = 6; +using FusedNodeRecord = std::vector>; + +class FusionBasePass : public Pass { + public: + FusionBasePass(const std::string &name, FusionIdAllocator *idAllocator) + : Pass(name), fusion_id_allocator(idAllocator) {} + ~FusionBasePass() override = default; + bool Run(const FuncGraphPtr &graph) override; + bool MatchUBFusionPattern(const session::KernelGraph &kernel_graph); + + protected: + virtual void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) = 0; + void SetRecordFusionId(const std::unordered_set &record); + bool CheckEltWiseNode(FuncGraphManager *manager, const AnfNodePtr &node); + FusionIdAllocator *fusion_id_allocator; +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_FUSION_BASE_PASS_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..41b17eba045264f973dbe66ff9f1eb9ec2bb1daa --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.cc @@ -0,0 +1,65 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h" +#include +#include +#include +#include +#include "kernel/kernel_fusion.h" +#include "debug/anf_ir_dump.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" +#include "utils/context/ms_context.h" +#include "pre_activate/common/fusion_id_allocator.h" + +namespace mindspore { +namespace opt { +void MatmulEltwiseFusionPass::MatchMatmulEltwise(const CNodePtr &cnode, const AnfNodePtr &relu_input, + const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + std::vector output_used_num{SizeToInt(manager->node_users()[relu_input].size())}; + AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), relu_input); + std::unordered_set record{cnode, relu_input}; + candidate_fusion->push_back(record); + SetRecordFusionId(record); +} + +void MatmulEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(candidate_fusion); + std::vector node_list = TopoSort(kernel_graph.get_return()); + for (auto &node : node_list) { + if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || + AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE) { + auto eltwise_input = cnode->input(1); + if (eltwise_input->isa() && AnfAlgo::CheckPrimitiveType(eltwise_input, prim::kPrimMatMul)) { + MatchMatmulEltwise(cnode, eltwise_input, kernel_graph, candidate_fusion); + } + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..00178ee678d1dd9e2255f57242bd77dd64c996bb --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MATMUL_ELTWISE_FUSION_PASS_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MATMUL_ELTWISE_FUSION_PASS_H_ + +#include +#include + +#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +using FusedNodeRecord = std::vector>; + +class MatmulEltwiseFusionPass : public FusionBasePass { + public: + explicit MatmulEltwiseFusionPass(FusionIdAllocator *idAllocator) + : FusionBasePass("MatmulEltwiseFusionPass", idAllocator) {} + ~MatmulEltwiseFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; + + private: + void MatchMatmulEltwise(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion); +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_PASS_MATMUL_ELTWISE_FUSION_PASS_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.cc deleted file mode 100644 index e993bdeb78453f8a25261383511e236bc2f68845..0000000000000000000000000000000000000000 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.cc +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "pre_activate/ascend/buffer_fusion/pass/fusion_base_pass.h" -#include -#include -#include "debug/anf_ir_dump.h" -#include "utils/context/ms_context.h" -#include "pre_activate/common/fusion_id_allocator.h" - -namespace mindspore { -namespace opt { -void FusionBasePass::SetRecordFusionId(const std::unordered_set &record) { - auto id = fusion_id_allocator->AllocateFusionId(); - for (auto node : record) { - fusion_id_allocator->SetFusionId(node, id); - } -} -bool FusionBasePass::Run(const FuncGraphPtr &graph) { - MS_EXCEPTION_IF_NULL(graph); - auto kernel_graph = graph->cast>(); - MS_EXCEPTION_IF_NULL(kernel_graph); - return MatchUBFusionPattern(*kernel_graph); -} -} // namespace opt -} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/fusion_type_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/fusion_type_fusion_pass.cc deleted file mode 100644 index 4ea98e580bca491d86363d7f2bc564f8e65fba19..0000000000000000000000000000000000000000 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/pass/fusion_type_fusion_pass.cc +++ /dev/null @@ -1,245 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "pre_activate/ascend/buffer_fusion/pass/fusion_type_fusion_pass.h" - -#include -#include -#include -#include -#include -#include - -#include "kernel/kernel_fusion.h" -#include "debug/anf_ir_dump.h" -#include "session/anf_runtime_algorithm.h" -#include "utils/context/ms_context.h" -#include "pre_activate/common/fusion_id_allocator.h" - -namespace mindspore { -namespace opt { -namespace { -const int8_t MAX_PATTERN_SIZE = 7; -const int8_t MIN_PATTERN_SIZE = 2; -const int8_t ELTWISE_INPUT_SIZE = 2; -const int8_t ELTWISE_USE = 1; -const int8_t MULTI_ELTWISE_USE = 2; -const int8_t MAX_MULTI_ELTWISE_SIZE = 4; -const int8_t MAX_PURE_BUFFER_SUCC_SIZE = 3; -constexpr auto kOpAttrFusionId = "fusion_id"; - -bool CheckEltWiseNode(FuncGraphManager *manager, std::unordered_set *record, const CNodePtr &node) { - MS_EXCEPTION_IF_NULL(manager); - MS_EXCEPTION_IF_NULL(record); - auto user_nodes = manager->node_users()[node]; - return (AnfAlgo::GetKernelType(node) == KernelType::TBE_KERNEL && - AnfAlgo::GetFusionType(node) == kernel::FusionType::ELEMWISE && - (user_nodes.size() <= ELTWISE_USE || record->size() == 0)); -} - -// Common method to check for predecessors and successors in a fusion pattern -std::tuple FindPredAndSuccEltWiseNodes(const int8_t &max_size, FuncGraphManager *manager, - std::unordered_set *visited_set, - std::deque *todo, - std::unordered_set *record, const CNodePtr &node) { - MS_EXCEPTION_IF_NULL(manager); - MS_EXCEPTION_IF_NULL(visited_set); - MS_EXCEPTION_IF_NULL(todo); - MS_EXCEPTION_IF_NULL(record); - MS_EXCEPTION_IF_NULL(node); - - CNodePtr new_node = node; - if (new_node->inputs().size() < ELTWISE_INPUT_SIZE) { - return std::make_tuple(false, new_node); - } - int8_t index = 1; - auto &users = manager->node_users(); - while (CheckEltWiseNode(manager, record, new_node)) { - (void)record->insert(new_node); - (void)visited_set->insert(new_node); - (void)todo->insert(todo->end(), new_node->inputs().begin() + 1, new_node->inputs().end()); - - auto cnode = new_node->input(1); - MS_EXCEPTION_IF_NULL(cnode); - if (!cnode->isa()) { - return std::make_tuple(false, new_node); - } - new_node = cnode->cast(); - MS_EXCEPTION_IF_NULL(new_node); - - if (!AnfAlgo::IsRealKernel(new_node) || new_node->inputs().size() < ELTWISE_INPUT_SIZE || - users[(new_node)].size() >= MULTI_ELTWISE_USE || visited_set->find(new_node) != visited_set->end()) { - return std::make_tuple(false, new_node); - } - - if (index >= max_size) { - break; - } - index++; - } - return std::make_tuple(true, new_node); -} - -std::tuple MatchGeneralPattern(FuncGraphManager *manager, std::unordered_set *record, - std::unordered_set *visited_set, - std::deque *todo, const CNodePtr &node) { - MS_EXCEPTION_IF_NULL(manager); - MS_EXCEPTION_IF_NULL(record); - MS_EXCEPTION_IF_NULL(visited_set); - MS_EXCEPTION_IF_NULL(node); - MS_EXCEPTION_IF_NULL(todo); - CNodePtr new_node = node; - auto &users = manager->node_users(); - if (users[(new_node)].size() >= MULTI_ELTWISE_USE) { - return std::make_tuple(false, new_node); - } - - (void)record->insert(node); - (void)visited_set->insert(node); - (void)todo->insert(todo->end(), new_node->inputs().begin() + 1, new_node->inputs().end()); - - if (node->inputs().size() < 2) { - return std::make_tuple(false, new_node); - } - // only check the first real input, will check all - auto cnode = node->input(1); - MS_EXCEPTION_IF_NULL(cnode); - if (!cnode->isa()) { - return std::make_tuple(false, new_node); - } - new_node = cnode->cast(); - MS_EXCEPTION_IF_NULL(new_node); - - if (!AnfAlgo::IsRealKernel(new_node) || users[(new_node)].size() >= MULTI_ELTWISE_USE || - visited_set->find(new_node) != visited_set->end()) { - return std::make_tuple(false, new_node); - } - return std::make_tuple(true, new_node); -} - -CNodePtr FindFusionAnfNode(FuncGraphManager *manager, std::unordered_set *visited_set, - std::unordered_set *record, std::deque *todo, const CNodePtr &node) { - MS_EXCEPTION_IF_NULL(manager); - MS_EXCEPTION_IF_NULL(visited_set); - MS_EXCEPTION_IF_NULL(record); - MS_EXCEPTION_IF_NULL(node); - MS_EXCEPTION_IF_NULL(todo); - // find fusion pattern predecessor nodes - auto ret = FindPredAndSuccEltWiseNodes(MAX_MULTI_ELTWISE_SIZE, manager, visited_set, todo, record, node); - auto new_node = std::get<1>(ret); - auto node_use_size = manager->node_users()[new_node].size(); - if (!std::get<0>(ret) || (record->size() > 1 && node_use_size > 1) || record->size() >= MAX_MULTI_ELTWISE_SIZE || - AnfAlgo::GetKernelType(new_node) != KernelType::TBE_KERNEL) { - return new_node; - } - - // key of fusion precessor - auto node_fusion_type = AnfAlgo::GetFusionType(new_node); - switch (node_fusion_type) { - case kernel::FusionType::COMMREDUCE: - case kernel::FusionType::SEGMENT: - ret = MatchGeneralPattern(manager, record, visited_set, todo, new_node); - new_node = std::get<1>(ret); - if (!std::get<0>(ret)) { - return new_node; - } - break; - case kernel::FusionType::ELEMWISE: - return new_node; - // -fallthrough to default and return - case kernel::FusionType::CONVLUTION: - (void)record->insert(new_node); - default: - (void)visited_set->insert(new_node); - if (new_node != nullptr) { - (void)todo->insert(todo->end(), new_node->inputs().begin() + 1, new_node->inputs().end()); - } - return new_node; - } - // find fusion pattern successor nodes - ret = FindPredAndSuccEltWiseNodes(MAX_PURE_BUFFER_SUCC_SIZE, manager, visited_set, todo, record, new_node); - return std::get<1>(ret); -} -} // namespace - -void FusionTypeFusionPass::MatchFusionTypePattern(const session::KernelGraph &kernel_graph, - FusedNodeRecord *candidate_fusion) { - auto manager = kernel_graph.manager(); - MS_EXCEPTION_IF_NULL(manager); - MS_EXCEPTION_IF_NULL(candidate_fusion); - - auto return_node = kernel_graph.get_return(); - MS_EXCEPTION_IF_NULL(return_node); - if (return_node->inputs().size() <= 1) { - return; - } - std::deque todo; - todo.push_back(return_node->input(1)); - std::unordered_set visited_set; - - while (!todo.empty()) { - auto node = todo.front(); - MS_EXCEPTION_IF_NULL(node); - todo.pop_front(); - std::unordered_set record; - if (visited_set.find(node) != visited_set.end() || fusion_id_allocator->HasFusionIdAttr(node)) { - continue; - } - // Only fuse real cnode - if (!AnfAlgo::IsRealCNodeKernel(node)) { - auto cnode = node->cast(); - if (cnode != nullptr) { - (void)todo.insert(todo.end(), cnode->inputs().begin() + 1, cnode->inputs().end()); - } - continue; - } - auto cnode = node->cast(); - MS_EXCEPTION_IF_NULL(cnode); - // cnode maybe updated - cnode = FindFusionAnfNode(manager.get(), &visited_set, &record, &todo, cnode); - if (record.size() >= MIN_PATTERN_SIZE && record.size() <= MAX_PATTERN_SIZE) { - candidate_fusion->push_back(record); - SetRecordFusionId(record); - } - if (record.find(cnode) == record.end()) { - todo.push_back(cnode); - } - // no node matched - if (record.size() == 0) { - (void)visited_set.insert(node); - } - (void)todo.insert(todo.end(), cnode->inputs().begin() + 1, cnode->inputs().end()); - } -} - -bool FusionTypeFusionPass::MatchUBFusionPattern(const session::KernelGraph &kernel_graph) { - auto manager = kernel_graph.manager(); - MS_EXCEPTION_IF_NULL(manager); - auto return_node = kernel_graph.get_return(); - MS_EXCEPTION_IF_NULL(return_node); - if (return_node->inputs().size() <= 1) { - return false; - } - MS_LOG(DEBUG) << "MatchBufferFusionPattern start..."; - FusedNodeRecord candidate_fusion; - MatchFusionTypePattern(kernel_graph, &candidate_fusion); - if (candidate_fusion.empty()) { - return false; - } - MS_LOG(DEBUG) << "MatchBufferFusionPattern Success..."; - return true; -} -} // namespace opt -} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..14f26b85acf7026707c073d1a3011bf167ca74a5 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.cc @@ -0,0 +1,88 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h" +#include +#include +#include +#include +#include "kernel/kernel_fusion.h" +#include "debug/anf_ir_dump.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" +#include "utils/context/ms_context.h" +#include "pre_activate/common/fusion_id_allocator.h" + +namespace mindspore { +namespace opt { +void ReduceEltwiseFusionPass::MatchReduceEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + std::unordered_set record{cnode}; + auto eltwise_input = cnode->input(1); + while (CheckEltWiseNode(manager.get(), eltwise_input)) { + (void)record.insert(eltwise_input); + auto input_cnode = eltwise_input->cast(); + MS_EXCEPTION_IF_NULL(input_cnode); + eltwise_input = input_cnode->input(1); + if (record.size() == MAX_ELTWISE_NUM) { + break; + } + } + if (!eltwise_input->isa() || !AnfAlgo::IsRealCNodeKernel(eltwise_input) || + fusion_id_allocator->HasFusionIdAttr(eltwise_input)) { + return; + } + if (AnfAlgo::GetKernelType(eltwise_input) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(eltwise_input) == kernel::FusionType::COMMREDUCE) { + (void)record.insert(eltwise_input); + auto previous_eltwise_input = cnode->input(1); + auto previous_size = record.size(); + while (CheckEltWiseNode(manager.get(), previous_eltwise_input)) { + (void)record.insert(previous_eltwise_input); + auto previous_node = previous_eltwise_input->cast(); + MS_EXCEPTION_IF_NULL(previous_node); + previous_eltwise_input = previous_node->input(1); + if (record.size() - previous_size == MAX_ELTWISE_NUM) { + break; + } + } + candidate_fusion->push_back(record); + SetRecordFusionId(record); + } +} + +void ReduceEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(candidate_fusion); + std::vector node_list = TopoSort(kernel_graph.get_return()); + for (auto &node : node_list) { + if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || + AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { + MatchReduceEltwise(cnode, kernel_graph, candidate_fusion); + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..082cbf99a0d150a451ba147f596d0687a37870d6 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_REDUCE_ELTWISE_FUSION_PASS_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_REDUCE_ELTWISE_FUSION_PASS_H_ + +#include +#include + +#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +using FusedNodeRecord = std::vector>; + +class ReduceEltwiseFusionPass : public FusionBasePass { + public: + explicit ReduceEltwiseFusionPass(FusionIdAllocator *idAllocator) + : FusionBasePass("ReduceEltwiseFusionPass", idAllocator) {} + ~ReduceEltwiseFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; + + private: + void MatchReduceEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion); +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_REDUCE_ELTWSIE_FUSION_PASS_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..329f5eb1a4b9e7f28b69cd744afc0553b3b37ca2 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.cc @@ -0,0 +1,88 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h" +#include +#include +#include +#include +#include "kernel/kernel_fusion.h" +#include "debug/anf_ir_dump.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" +#include "utils/context/ms_context.h" +#include "pre_activate/common/fusion_id_allocator.h" + +namespace mindspore { +namespace opt { +void SegmentEltwiseFusionPass::MatchSegmentEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + std::unordered_set record{cnode}; + auto eltwise_input = cnode->input(1); + while (CheckEltWiseNode(manager.get(), eltwise_input)) { + (void)record.insert(eltwise_input); + auto input_cnode = eltwise_input->cast(); + MS_EXCEPTION_IF_NULL(input_cnode); + eltwise_input = input_cnode->input(1); + if (record.size() == MAX_ELTWISE_NUM) { + break; + } + } + if (!eltwise_input->isa() || !AnfAlgo::IsRealCNodeKernel(eltwise_input) || + fusion_id_allocator->HasFusionIdAttr(eltwise_input)) { + return; + } + if (AnfAlgo::GetKernelType(eltwise_input) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(eltwise_input) == kernel::FusionType::SEGMENT) { + (void)record.insert(eltwise_input); + auto previous_eltwise_input = cnode->input(1); + auto previous_size = record.size(); + while (CheckEltWiseNode(manager.get(), previous_eltwise_input)) { + (void)record.insert(previous_eltwise_input); + auto previous_node = previous_eltwise_input->cast(); + MS_EXCEPTION_IF_NULL(previous_node); + previous_eltwise_input = previous_node->input(1); + if (record.size() - previous_size == MAX_ELTWISE_NUM) { + break; + } + } + candidate_fusion->push_back(record); + SetRecordFusionId(record); + } +} + +void SegmentEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(candidate_fusion); + std::vector node_list = TopoSort(kernel_graph.get_return()); + for (auto &node : node_list) { + if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || + AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { + continue; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL && + AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) { + MatchSegmentEltwise(cnode, kernel_graph, candidate_fusion); + } + } +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..c774d2a8bf0e5606a8b81808680673cf69169de1 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h @@ -0,0 +1,48 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_SEGMENT_ELTWISE_FUSION_PASS_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_SEGMENT_ELTWISE_FUSION_PASS_H_ + +#include +#include + +#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h" +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" + +namespace mindspore { +namespace opt { +using FusedNodeRecord = std::vector>; + +class SegmentEltwiseFusionPass : public FusionBasePass { + public: + explicit SegmentEltwiseFusionPass(FusionIdAllocator *idAllocator) + : FusionBasePass("SegmentEltwiseFusionPass", idAllocator) {} + ~SegmentEltwiseFusionPass() override = default; + void MatchSingleFusionPattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion) override; + + private: + void MatchSegmentEltwise(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + FusedNodeRecord *candidate_fusion); +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_SEGMENT_ELTWSIE_FUSION_PASS_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/tbe_buffer_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.cc similarity index 96% rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/tbe_buffer_fusion.cc rename to mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.cc index 7eb9ac7bdf00a8f03e4433d39a9a4cfbf10faeb9..af20c47996d5b52015bced625388e2596449f12f 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/tbe_buffer_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.cc @@ -1,435 +1,435 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "pre_activate/ascend/buffer_fusion/tbe_buffer_fusion.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "kernel/kernel_fusion.h" -#include "debug/anf_ir_dump.h" -#include "session/anf_runtime_algorithm.h" -#include "operator/ops.h" -#include "device/kernel_info.h" -#include "utils/context/ms_context.h" - -namespace mindspore { -namespace opt { -namespace { -const int8_t MAX_PATTERN_SIZE = 7; -const int8_t MIN_PATTERN_SIZE = 2; -const int8_t ELTWISE_INPUT_SIZE = 2; -const int8_t ELTWISE_USE = 1; -const int8_t MULTI_ELTWISE_USE = 2; -const int8_t MAX_MULTI_ELTWISE_SIZE = 4; -const int8_t MAX_PURE_BUFFER_SUCC_SIZE = 3; -constexpr auto kOpAttrFusionId = "fusion_id"; - -#ifdef DEBUG -std::string GetFusionTypeName(const kernel::FusionType &type) { - switch (type) { - case kernel::FusionType::COMMREDUCE: - return "COMMREDUCE"; - case kernel::FusionType::SEGMENT: - return "SEGMENT"; - case kernel::FusionType::ELEMWISE: - return "ELEMWISE"; - case kernel::FusionType::CONVLUTION: - return "CONVLUTION"; - case kernel::FusionType::OPAQUE: - return "OPAQUE"; - default: - return "OPAQUE"; - } -} - -void DumpFusionScopeInfo(const kernel::FusionScopeInfo &info) { - MS_LOG(INFO) << "=== Dump FusionScopeInfo start id: " << info.scope_id; - for (auto &node : info.input_nodes) { - MS_LOG(INFO) << "=== Input: " << node->DebugString(); - } - for (auto &node : info.output_nodes) { - MS_LOG(INFO) << "=== Output: " << node->DebugString(); - } - for (auto &node : info.compute_nodes) { - MS_LOG(INFO) << "=== Compute: (" << node->DebugString() << ")-(" << GetFusionTypeName(AnfAlgo::GetFusionType(node)) - << ")"; - } - MS_LOG(INFO) << "=== Dump FusionScopeInfo end"; -} -#endif -CNodePtr CreateFusionOp(const std::vector &inputs_list, const std::vector &outputs_list, - const std::vector &anf_nodes, session::KernelGraph *kernel_graph) { - MS_LOG(DEBUG) << "Start Create FusionOp Kernel"; - MS_EXCEPTION_IF_NULL(kernel_graph); - std::string fusion_op_name = "FusionOp"; - for (auto node : anf_nodes) { - fusion_op_name += '_' + AnfAlgo::GetCNodeName(node); - } - auto fusion_op = std::make_shared(fusion_op_name); - MS_EXCEPTION_IF_NULL(fusion_op); - - std::vector input_names; - for (uint8_t i = 0; i < inputs_list.size(); i++) { - input_names.emplace_back("input" + std::to_string(i)); - } - std::vector output_names; - for (uint8_t i = 0; i < outputs_list.size(); i++) { - output_names.emplace_back("output" + std::to_string(i)); - } - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - fusion_op->set_attr("input_names", input_names_v); - fusion_op->set_attr("output_names", output_names_v); - std::vector fusion_inputs_list = inputs_list; - auto value_node = std::make_shared(fusion_op); - (void)fusion_inputs_list.insert(fusion_inputs_list.begin(), value_node); - auto buffer_fusion_kernel = kernel_graph->NewCNode(fusion_inputs_list); - if (buffer_fusion_kernel == nullptr) { - MS_LOG(EXCEPTION) << "New FusionOp kernel failed!"; - } - buffer_fusion_kernel->set_scope((anf_nodes.back())->scope()); - - return buffer_fusion_kernel; -} - -kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector &inputs_list, - const std::vector &outputs_list) { - MS_LOG(DEBUG) << "Start Create Kernel Info"; - kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; - // inputs format and data type - std::vector inputs_format; - std::vector inputs_data_type; - for (const auto &input : inputs_list) { - auto real_input = AnfAlgo::VisitKernel(input, 0); - inputs_format.push_back(AnfAlgo::GetOutputFormat(real_input.first, real_input.second)); - inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(real_input.first, real_input.second)); - } - // outputs format and data type - std::vector outputs_format; - std::vector outputs_data_type; - for (const auto &output : outputs_list) { - if (AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { - auto tuple_getitem = output->cast(); - MS_EXCEPTION_IF_NULL(tuple_getitem); - outputs_format.push_back(AnfAlgo::GetOutputFormat( - tuple_getitem->input(1), IntToSize(GetValue(GetValueNode(tuple_getitem->input(2)))))); - outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType( - tuple_getitem->input(1), IntToSize(GetValue(GetValueNode(tuple_getitem->input(2)))))); - } else { - outputs_format.push_back(AnfAlgo::GetOutputFormat(output, 0)); - outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(output, 0)); - } - } - builder.SetInputsFormat(inputs_format); - builder.SetInputsDeviceType(inputs_data_type); - builder.SetOutputsFormat(outputs_format); - builder.SetOutputsDeviceType(outputs_data_type); - builder.SetKernelType(KernelType::TBE_KERNEL); - return builder.Build(); -} - -AnfNodePtr CreateTupleGetItem(const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph, - size_t output_index) { - MS_EXCEPTION_IF_NULL(kernel_graph); - std::vector tuple_getitem_inputs_list; - auto value = std::make_shared(prim::kPrimTupleGetItem); - MS_EXCEPTION_IF_NULL(value); - auto idx = NewValueNode(SizeToInt(output_index)); - MS_EXCEPTION_IF_NULL(idx); - int temp = SizeToInt(output_index); - auto imm = std::make_shared(temp); - auto abstract_scalar = std::make_shared(imm); - idx->set_abstract(abstract_scalar); - tuple_getitem_inputs_list.push_back(value); - tuple_getitem_inputs_list.push_back(buffer_fusion_kernel); - tuple_getitem_inputs_list.push_back(idx); - auto tuple_item = kernel_graph->NewCNode(tuple_getitem_inputs_list); - MS_EXCEPTION_IF_NULL(tuple_item); - AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(buffer_fusion_kernel, output_index)}, - {AnfAlgo::GetOutputInferShape(buffer_fusion_kernel, output_index)}, - tuple_item.get()); - return tuple_item; -} - -void ReplaceInputNodeInOtherFusionScope(std::unordered_map *buffer_fusion_infos, - int32_t fusion_id, const AnfNodePtr &output_item, - const AnfNodePtr &replace_item) { - for (int32_t id = fusion_id + 1; id <= SizeToInt(buffer_fusion_infos->size()); ++id) { - auto itr = std::find((*buffer_fusion_infos)[id].inputs_list.begin(), (*buffer_fusion_infos)[id].inputs_list.end(), - output_item); - if (itr != (*buffer_fusion_infos)[id].inputs_list.end()) { - MS_LOG(DEBUG) << "replace input of other pattern, id = " << id; - *itr = replace_item; - } - } -} - -void ReplaceOldNode(std::unordered_map *buffer_fusion_infos, int32_t fusion_id, - const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph) { - MS_EXCEPTION_IF_NULL(kernel_graph); - auto manager = kernel_graph->manager(); - MS_EXCEPTION_IF_NULL(manager); - auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; - if (buffer_fusion_info.outputs_list.size() == 1) { // single output - (void)manager->Replace(buffer_fusion_info.outputs_list[0], buffer_fusion_kernel); - ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[0], - buffer_fusion_kernel); - } else { // multiple output - for (size_t index = 0; index < buffer_fusion_info.outputs_list.size(); ++index) { - auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, index); - (void)manager->Replace(buffer_fusion_info.outputs_list[index], tuple_item); - ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[index], - tuple_item); - } - } -} - -void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph, - std::unordered_map *buffer_fusion_infos) { - MS_EXCEPTION_IF_NULL(buffer_fusion_infos); - auto nodes = TopoSort(kernel_graph->get_return()); - for (auto &node : nodes) { - MS_EXCEPTION_IF_NULL(node); - if (!node->isa()) { - continue; - } - auto cnode = node->cast(); - if (AnfAlgo::IsRealCNodeKernel(cnode) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, cnode)) { - auto fusion_id = AnfAlgo::GetNodeAttr(cnode, kOpAttrFusionId); - (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(cnode); - } - } -} - -void GetFusionScopeInputNodeList(const session::KernelGraph &kernel_graph, - std::unordered_map *buffer_fusion_infos) { - MS_EXCEPTION_IF_NULL(buffer_fusion_infos); - auto manager = kernel_graph.manager(); - MS_EXCEPTION_IF_NULL(manager); - - for (auto &buffer_fusion_info : *buffer_fusion_infos) { - auto fusion_id = buffer_fusion_info.first; - auto fusion_info = buffer_fusion_info.second; - for (const auto &node : fusion_info.anf_nodes) { - auto cnode = node->cast(); - for (size_t idx = 1; idx < cnode->inputs().size(); ++idx) { - auto real_input = AnfAlgo::VisitKernel(cnode->input(idx), 0); - if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), real_input.first) == - fusion_info.anf_nodes.end()) { - if (std::find((*buffer_fusion_infos)[fusion_id].inputs_list.begin(), - (*buffer_fusion_infos)[fusion_id].inputs_list.end(), - cnode->input(idx)) == (*buffer_fusion_infos)[fusion_id].inputs_list.end()) { - (*buffer_fusion_infos)[fusion_id].inputs_list.push_back(cnode->input(idx)); - } - } - } - } - } -} - -bool TupleGetitemNodeCompare(const AnfNodePtr &node1, const AnfNodePtr &node2) { - MS_EXCEPTION_IF_NULL(node1); - MS_EXCEPTION_IF_NULL(node2); - auto getitem1 = node1->cast(); - auto getitem2 = node2->cast(); - MS_EXCEPTION_IF_NULL(getitem1); - MS_EXCEPTION_IF_NULL(getitem2); - auto output_idx1 = GetValue(GetValueNode(getitem1->input(2))); - auto output_idx2 = GetValue(GetValueNode(getitem2->input(2))); - return output_idx1 < output_idx2; -} - -void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph, - std::unordered_map *buffer_fusion_infos) { - MS_EXCEPTION_IF_NULL(kernel_graph); - MS_EXCEPTION_IF_NULL(buffer_fusion_infos); - auto manager = kernel_graph->manager(); - MS_EXCEPTION_IF_NULL(manager); - - for (auto &buffer_fusion_info : *buffer_fusion_infos) { - auto fusion_id = buffer_fusion_info.first; - auto fusion_info = buffer_fusion_info.second; - for (const auto &node : fusion_info.anf_nodes) { - if (AnfAlgo::GetOutputTensorNum(node) == 1) { - for (auto use_node : manager->node_users()[node]) { - if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), use_node.first) == - fusion_info.anf_nodes.end()) { - (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(node); - break; - } - } - } else { - int prev_idx = 0; - std::vector tuple_getitem_nodes; - std::transform(manager->node_users()[node].begin(), manager->node_users()[node].end(), - std::back_inserter(tuple_getitem_nodes), - [](const std::pair &use_node) { return use_node.first; }); - std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(), TupleGetitemNodeCompare); - for (auto getitem : tuple_getitem_nodes) { - auto getitem_ptr = getitem->cast(); - auto input2 = getitem_ptr->input(2); - auto output_idx = GetValue(GetValueNode(input2)); - for (int stub_idx = prev_idx; stub_idx < output_idx; ++stub_idx) { - auto stub_node = CreateTupleGetItem(node, kernel_graph, IntToSize(stub_idx)); - (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(stub_node); - } - prev_idx = output_idx + 1; - for (auto item_use_node : manager->node_users()[getitem]) { - if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), item_use_node.first) == - fusion_info.anf_nodes.end()) { - (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(getitem); - break; - } - } - } - } - } - } -} - -void SetFusionOpRefInfos(session::KernelGraph *kernel_graph, const std::vector &outputs_list, - const AnfNodePtr &fusion_kernel) { - MS_EXCEPTION_IF_NULL(kernel_graph); - auto manager = kernel_graph->manager(); - MS_EXCEPTION_IF_NULL(manager); - for (size_t idx = 0; idx < outputs_list.size(); ++idx) { - auto output = outputs_list[idx]; - if (output->isa() && AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { - auto real_output = AnfAlgo::VisitKernel(output, 0); - auto output_cnode = output->cast(); - MS_EXCEPTION_IF_NULL(output_cnode); - auto input2 = output_cnode->input(2); - auto output_idx = GetValue(GetValueNode(input2)); - session::AnfWithOutIndex out_pair(real_output.first, output_idx); - if (kernel_graph->IsInRefOutputMap(out_pair)) { - auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); - session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); - kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); - } - } else { - session::AnfWithOutIndex out_pair(output, 0); - if (kernel_graph->IsInRefOutputMap(out_pair)) { - auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); - session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); - kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); - } - } - } -} -} // namespace - -void TbeBufferFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, - std::unordered_map *buffer_fusion_infos) const { - MS_EXCEPTION_IF_NULL(buffer_fusion_infos); - GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); - GetFusionScopeInputNodeList(*kernel_graph, buffer_fusion_infos); - GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); - for (auto &buffer_fusion_info : *buffer_fusion_infos) { - buffer_fusion_info.second.kernel_build_info = - CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list); - } -} - -bool TbeBufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const { - MS_EXCEPTION_IF_NULL(kernel_graph); - bool change = false; - std::unordered_map buffer_fusion_infos; - buffer_fusion_infos.clear(); - GetBufferFusionInfo(kernel_graph, &buffer_fusion_infos); - - std::vector fusion_scope_infos; - for (auto &buffer_fusion_info : buffer_fusion_infos) { - mindspore::kernel::FusionScopeInfo fusion_scope_info; - fusion_scope_info.scope_id = buffer_fusion_info.first; - fusion_scope_info.input_nodes = buffer_fusion_info.second.inputs_list; - fusion_scope_info.compute_nodes = buffer_fusion_info.second.anf_nodes; - fusion_scope_info.output_nodes = buffer_fusion_info.second.outputs_list; - fusion_scope_infos.push_back(fusion_scope_info); -#ifdef DEBUG - DumpFusionScopeInfo(fusion_scope_info); -#endif - } - auto kernel_mods = mindspore::kernel::KernelFusion(fusion_scope_infos); - std::vector fusion_ids; - for (auto &buffer_fusion_info : buffer_fusion_infos) { - MS_LOG(DEBUG) << "anf node size: " << buffer_fusion_info.second.anf_nodes.size() - << ", inputs_list size: " << buffer_fusion_info.second.inputs_list.size() - << ", outputs list size: " << buffer_fusion_info.second.outputs_list.size(); - fusion_ids.push_back(buffer_fusion_info.first); - } - // Replace fusion op from return to head - std::sort(fusion_ids.begin(), fusion_ids.end()); - for (auto &fusion_id : fusion_ids) { - // Get kernel mod when supporting tbe - if (kernel_mods.find(fusion_id) == kernel_mods.end() || kernel_mods[fusion_id] == nullptr) { - MS_LOG(DEBUG) << "fusion id: " << fusion_id << ", fusion op compiling failed"; - continue; - } - change = ReplaceFusionOp(&buffer_fusion_infos, fusion_id, kernel_mods[fusion_id], kernel_graph); - } - MS_LOG(DEBUG) << "End Buffer Fusion"; - return change; -} - -bool TbeBufferFusion::ReplaceFusionOp(std::unordered_map *buffer_fusion_infos, - int32_t fusion_id, const kernel::KernelModPtr &kernel_ptr, - session::KernelGraph *kernel_graph) const { - auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; - auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list, - buffer_fusion_info.anf_nodes, kernel_graph); - AnfAlgo::SetSelectKernelBuildInfo(buffer_fusion_info.kernel_build_info, buffer_fusion.get()); - // Set abstract of fusion_op node - std::vector types; - std::vector> shapes; - for (const auto &out_node : buffer_fusion_info.outputs_list) { - for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(out_node); ++idx) { - types.push_back(AnfAlgo::GetOutputInferDataType(out_node, idx)); - shapes.push_back(AnfAlgo::GetOutputInferShape(out_node, idx)); - } - } - if (types.empty() || shapes.empty()) { - MS_LOG(WARNING) << "buffer_fusion_info.outputs_list is empty"; - return false; - } - AnfAlgo::SetOutputInferTypeAndShape(types, shapes, buffer_fusion.get()); - AnfAlgo::SetKernelMod(kernel_ptr, buffer_fusion.get()); - SetFusionOpRefInfos(kernel_graph, buffer_fusion_info.outputs_list, buffer_fusion); - ReplaceOldNode(buffer_fusion_infos, fusion_id, buffer_fusion, kernel_graph); - return true; -} - -bool TbeBufferFusion::Run(const FuncGraphPtr &graph) { - bool changed = false; - MS_EXCEPTION_IF_NULL(graph); - auto kernel_graph = graph->cast>(); - MS_EXCEPTION_IF_NULL(kernel_graph); - changed = FuseBufferFusionPattern(kernel_graph.get()); - // clear fusion_id attr - for (auto &node : graph->nodes()) { - if (node != nullptr && node->isa()) { - AnfAlgo::EraseNodeAttr(kAttrFusionId, node); - } - } - return changed; -} -} // namespace opt -} // namespace mindspore +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "kernel/kernel_fusion.h" +#include "debug/anf_ir_dump.h" +#include "session/anf_runtime_algorithm.h" +#include "operator/ops.h" +#include "device/kernel_info.h" +#include "utils/context/ms_context.h" + +namespace mindspore { +namespace opt { +namespace { +const int8_t MAX_PATTERN_SIZE = 7; +const int8_t MIN_PATTERN_SIZE = 2; +const int8_t ELTWISE_INPUT_SIZE = 2; +const int8_t ELTWISE_USE = 1; +const int8_t MULTI_ELTWISE_USE = 2; +const int8_t MAX_MULTI_ELTWISE_SIZE = 4; +const int8_t MAX_PURE_BUFFER_SUCC_SIZE = 3; +constexpr auto kOpAttrFusionId = "fusion_id"; + +#ifdef DEBUG +std::string GetFusionTypeName(const kernel::FusionType &type) { + switch (type) { + case kernel::FusionType::COMMREDUCE: + return "COMMREDUCE"; + case kernel::FusionType::SEGMENT: + return "SEGMENT"; + case kernel::FusionType::ELEMWISE: + return "ELEMWISE"; + case kernel::FusionType::CONVLUTION: + return "CONVLUTION"; + case kernel::FusionType::OPAQUE: + return "OPAQUE"; + default: + return "OPAQUE"; + } +} + +void DumpFusionScopeInfo(const kernel::FusionScopeInfo &info) { + MS_LOG(INFO) << "=== Dump FusionScopeInfo start id: " << info.scope_id; + for (auto &node : info.input_nodes) { + MS_LOG(INFO) << "=== Input: " << node->DebugString(); + } + for (auto &node : info.output_nodes) { + MS_LOG(INFO) << "=== Output: " << node->DebugString(); + } + for (auto &node : info.compute_nodes) { + MS_LOG(INFO) << "=== Compute: (" << node->DebugString() << ")-(" << GetFusionTypeName(AnfAlgo::GetFusionType(node)) + << ")"; + } + MS_LOG(INFO) << "=== Dump FusionScopeInfo end"; +} +#endif +CNodePtr CreateFusionOp(const std::vector &inputs_list, const std::vector &outputs_list, + const std::vector &anf_nodes, session::KernelGraph *kernel_graph) { + MS_LOG(DEBUG) << "Start Create FusionOp Kernel"; + MS_EXCEPTION_IF_NULL(kernel_graph); + std::string fusion_op_name = "FusionOp"; + for (auto node : anf_nodes) { + fusion_op_name += '_' + AnfAlgo::GetCNodeName(node); + } + auto fusion_op = std::make_shared(fusion_op_name); + MS_EXCEPTION_IF_NULL(fusion_op); + + std::vector input_names; + for (uint8_t i = 0; i < inputs_list.size(); i++) { + input_names.emplace_back("input" + std::to_string(i)); + } + std::vector output_names; + for (uint8_t i = 0; i < outputs_list.size(); i++) { + output_names.emplace_back("output" + std::to_string(i)); + } + + ValuePtr input_names_v = MakeValue(input_names); + ValuePtr output_names_v = MakeValue(output_names); + fusion_op->set_attr("input_names", input_names_v); + fusion_op->set_attr("output_names", output_names_v); + std::vector fusion_inputs_list = inputs_list; + auto value_node = std::make_shared(fusion_op); + (void)fusion_inputs_list.insert(fusion_inputs_list.begin(), value_node); + auto buffer_fusion_kernel = kernel_graph->NewCNode(fusion_inputs_list); + if (buffer_fusion_kernel == nullptr) { + MS_LOG(EXCEPTION) << "New FusionOp kernel failed!"; + } + buffer_fusion_kernel->set_scope((anf_nodes.back())->scope()); + + return buffer_fusion_kernel; +} + +kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector &inputs_list, + const std::vector &outputs_list) { + MS_LOG(DEBUG) << "Start Create Kernel Info"; + kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; + // inputs format and data type + std::vector inputs_format; + std::vector inputs_data_type; + for (const auto &input : inputs_list) { + auto real_input = AnfAlgo::VisitKernel(input, 0); + inputs_format.push_back(AnfAlgo::GetOutputFormat(real_input.first, real_input.second)); + inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(real_input.first, real_input.second)); + } + // outputs format and data type + std::vector outputs_format; + std::vector outputs_data_type; + for (const auto &output : outputs_list) { + if (AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { + auto tuple_getitem = output->cast(); + MS_EXCEPTION_IF_NULL(tuple_getitem); + outputs_format.push_back(AnfAlgo::GetOutputFormat( + tuple_getitem->input(1), IntToSize(GetValue(GetValueNode(tuple_getitem->input(2)))))); + outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType( + tuple_getitem->input(1), IntToSize(GetValue(GetValueNode(tuple_getitem->input(2)))))); + } else { + outputs_format.push_back(AnfAlgo::GetOutputFormat(output, 0)); + outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(output, 0)); + } + } + builder.SetInputsFormat(inputs_format); + builder.SetInputsDeviceType(inputs_data_type); + builder.SetOutputsFormat(outputs_format); + builder.SetOutputsDeviceType(outputs_data_type); + builder.SetKernelType(KernelType::TBE_KERNEL); + return builder.Build(); +} + +AnfNodePtr CreateTupleGetItem(const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph, + size_t output_index) { + MS_EXCEPTION_IF_NULL(kernel_graph); + std::vector tuple_getitem_inputs_list; + auto value = std::make_shared(prim::kPrimTupleGetItem); + MS_EXCEPTION_IF_NULL(value); + auto idx = NewValueNode(SizeToInt(output_index)); + MS_EXCEPTION_IF_NULL(idx); + int temp = SizeToInt(output_index); + auto imm = std::make_shared(temp); + auto abstract_scalar = std::make_shared(imm); + idx->set_abstract(abstract_scalar); + tuple_getitem_inputs_list.push_back(value); + tuple_getitem_inputs_list.push_back(buffer_fusion_kernel); + tuple_getitem_inputs_list.push_back(idx); + auto tuple_item = kernel_graph->NewCNode(tuple_getitem_inputs_list); + MS_EXCEPTION_IF_NULL(tuple_item); + AnfAlgo::SetOutputInferTypeAndShape({AnfAlgo::GetOutputInferDataType(buffer_fusion_kernel, output_index)}, + {AnfAlgo::GetOutputInferShape(buffer_fusion_kernel, output_index)}, + tuple_item.get()); + return tuple_item; +} + +void ReplaceInputNodeInOtherFusionScope(std::unordered_map *buffer_fusion_infos, + int32_t fusion_id, const AnfNodePtr &output_item, + const AnfNodePtr &replace_item) { + for (int32_t id = fusion_id + 1; id <= SizeToInt(buffer_fusion_infos->size()); ++id) { + auto itr = std::find((*buffer_fusion_infos)[id].inputs_list.begin(), (*buffer_fusion_infos)[id].inputs_list.end(), + output_item); + if (itr != (*buffer_fusion_infos)[id].inputs_list.end()) { + MS_LOG(DEBUG) << "replace input of other pattern, id = " << id; + *itr = replace_item; + } + } +} + +void ReplaceOldNode(std::unordered_map *buffer_fusion_infos, int32_t fusion_id, + const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph) { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto manager = kernel_graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; + if (buffer_fusion_info.outputs_list.size() == 1) { // single output + (void)manager->Replace(buffer_fusion_info.outputs_list[0], buffer_fusion_kernel); + ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[0], + buffer_fusion_kernel); + } else { // multiple output + for (size_t index = 0; index < buffer_fusion_info.outputs_list.size(); ++index) { + auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, index); + (void)manager->Replace(buffer_fusion_info.outputs_list[index], tuple_item); + ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[index], + tuple_item); + } + } +} + +void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph, + std::unordered_map *buffer_fusion_infos) { + MS_EXCEPTION_IF_NULL(buffer_fusion_infos); + auto nodes = TopoSort(kernel_graph->get_return()); + for (auto &node : nodes) { + MS_EXCEPTION_IF_NULL(node); + if (!node->isa()) { + continue; + } + auto cnode = node->cast(); + if (AnfAlgo::IsRealCNodeKernel(cnode) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, cnode)) { + auto fusion_id = AnfAlgo::GetNodeAttr(cnode, kOpAttrFusionId); + (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(cnode); + } + } +} + +void GetFusionScopeInputNodeList(const session::KernelGraph &kernel_graph, + std::unordered_map *buffer_fusion_infos) { + MS_EXCEPTION_IF_NULL(buffer_fusion_infos); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + + for (auto &buffer_fusion_info : *buffer_fusion_infos) { + auto fusion_id = buffer_fusion_info.first; + auto fusion_info = buffer_fusion_info.second; + for (const auto &node : fusion_info.anf_nodes) { + auto cnode = node->cast(); + for (size_t idx = 1; idx < cnode->inputs().size(); ++idx) { + auto real_input = AnfAlgo::VisitKernel(cnode->input(idx), 0); + if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), real_input.first) == + fusion_info.anf_nodes.end()) { + if (std::find((*buffer_fusion_infos)[fusion_id].inputs_list.begin(), + (*buffer_fusion_infos)[fusion_id].inputs_list.end(), + cnode->input(idx)) == (*buffer_fusion_infos)[fusion_id].inputs_list.end()) { + (*buffer_fusion_infos)[fusion_id].inputs_list.push_back(cnode->input(idx)); + } + } + } + } + } +} + +bool TupleGetitemNodeCompare(const AnfNodePtr &node1, const AnfNodePtr &node2) { + MS_EXCEPTION_IF_NULL(node1); + MS_EXCEPTION_IF_NULL(node2); + auto getitem1 = node1->cast(); + auto getitem2 = node2->cast(); + MS_EXCEPTION_IF_NULL(getitem1); + MS_EXCEPTION_IF_NULL(getitem2); + auto output_idx1 = GetValue(GetValueNode(getitem1->input(2))); + auto output_idx2 = GetValue(GetValueNode(getitem2->input(2))); + return output_idx1 < output_idx2; +} + +void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph, + std::unordered_map *buffer_fusion_infos) { + MS_EXCEPTION_IF_NULL(kernel_graph); + MS_EXCEPTION_IF_NULL(buffer_fusion_infos); + auto manager = kernel_graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + + for (auto &buffer_fusion_info : *buffer_fusion_infos) { + auto fusion_id = buffer_fusion_info.first; + auto fusion_info = buffer_fusion_info.second; + for (const auto &node : fusion_info.anf_nodes) { + if (AnfAlgo::GetOutputTensorNum(node) == 1) { + for (auto use_node : manager->node_users()[node]) { + if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), use_node.first) == + fusion_info.anf_nodes.end()) { + (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(node); + break; + } + } + } else { + int prev_idx = 0; + std::vector tuple_getitem_nodes; + std::transform(manager->node_users()[node].begin(), manager->node_users()[node].end(), + std::back_inserter(tuple_getitem_nodes), + [](const std::pair &use_node) { return use_node.first; }); + std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(), TupleGetitemNodeCompare); + for (auto getitem : tuple_getitem_nodes) { + auto getitem_ptr = getitem->cast(); + auto input2 = getitem_ptr->input(2); + auto output_idx = GetValue(GetValueNode(input2)); + for (int stub_idx = prev_idx; stub_idx < output_idx; ++stub_idx) { + auto stub_node = CreateTupleGetItem(node, kernel_graph, IntToSize(stub_idx)); + (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(stub_node); + } + prev_idx = output_idx + 1; + for (auto item_use_node : manager->node_users()[getitem]) { + if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), item_use_node.first) == + fusion_info.anf_nodes.end()) { + (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(getitem); + break; + } + } + } + } + } + } +} + +void SetFusionOpRefInfos(session::KernelGraph *kernel_graph, const std::vector &outputs_list, + const AnfNodePtr &fusion_kernel) { + MS_EXCEPTION_IF_NULL(kernel_graph); + auto manager = kernel_graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + for (size_t idx = 0; idx < outputs_list.size(); ++idx) { + auto output = outputs_list[idx]; + if (output->isa() && AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { + auto real_output = AnfAlgo::VisitKernel(output, 0); + auto output_cnode = output->cast(); + MS_EXCEPTION_IF_NULL(output_cnode); + auto input2 = output_cnode->input(2); + auto output_idx = GetValue(GetValueNode(input2)); + session::AnfWithOutIndex out_pair(real_output.first, output_idx); + if (kernel_graph->IsInRefOutputMap(out_pair)) { + auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); + session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); + kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); + } + } else { + session::AnfWithOutIndex out_pair(output, 0); + if (kernel_graph->IsInRefOutputMap(out_pair)) { + auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair); + session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx); + kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair); + } + } + } +} +} // namespace + +void UbPatternFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, + std::unordered_map *buffer_fusion_infos) const { + MS_EXCEPTION_IF_NULL(buffer_fusion_infos); + GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); + GetFusionScopeInputNodeList(*kernel_graph, buffer_fusion_infos); + GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); + for (auto &buffer_fusion_info : *buffer_fusion_infos) { + buffer_fusion_info.second.kernel_build_info = + CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list); + } +} + +bool UbPatternFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const { + MS_EXCEPTION_IF_NULL(kernel_graph); + bool change = false; + std::unordered_map buffer_fusion_infos; + buffer_fusion_infos.clear(); + GetBufferFusionInfo(kernel_graph, &buffer_fusion_infos); + + std::vector fusion_scope_infos; + for (auto &buffer_fusion_info : buffer_fusion_infos) { + mindspore::kernel::FusionScopeInfo fusion_scope_info; + fusion_scope_info.scope_id = buffer_fusion_info.first; + fusion_scope_info.input_nodes = buffer_fusion_info.second.inputs_list; + fusion_scope_info.compute_nodes = buffer_fusion_info.second.anf_nodes; + fusion_scope_info.output_nodes = buffer_fusion_info.second.outputs_list; + fusion_scope_infos.push_back(fusion_scope_info); +#ifdef DEBUG + DumpFusionScopeInfo(fusion_scope_info); +#endif + } + auto kernel_mods = mindspore::kernel::KernelFusion(fusion_scope_infos); + std::vector fusion_ids; + for (auto &buffer_fusion_info : buffer_fusion_infos) { + MS_LOG(DEBUG) << "anf node size: " << buffer_fusion_info.second.anf_nodes.size() + << ", inputs_list size: " << buffer_fusion_info.second.inputs_list.size() + << ", outputs list size: " << buffer_fusion_info.second.outputs_list.size(); + fusion_ids.push_back(buffer_fusion_info.first); + } + // Replace fusion op from return to head + std::sort(fusion_ids.begin(), fusion_ids.end()); + for (auto &fusion_id : fusion_ids) { + // Get kernel mod when supporting tbe + if (kernel_mods.find(fusion_id) == kernel_mods.end() || kernel_mods[fusion_id] == nullptr) { + MS_LOG(DEBUG) << "fusion id: " << fusion_id << ", fusion op compiling failed"; + continue; + } + change = ReplaceFusionOp(&buffer_fusion_infos, fusion_id, kernel_mods[fusion_id], kernel_graph); + } + MS_LOG(DEBUG) << "End Buffer Fusion"; + return change; +} + +bool UbPatternFusion::ReplaceFusionOp(std::unordered_map *buffer_fusion_infos, + int32_t fusion_id, const kernel::KernelModPtr &kernel_ptr, + session::KernelGraph *kernel_graph) const { + auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; + auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list, + buffer_fusion_info.anf_nodes, kernel_graph); + AnfAlgo::SetSelectKernelBuildInfo(buffer_fusion_info.kernel_build_info, buffer_fusion.get()); + // Set abstract of fusion_op node + std::vector types; + std::vector> shapes; + for (const auto &out_node : buffer_fusion_info.outputs_list) { + for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(out_node); ++idx) { + types.push_back(AnfAlgo::GetOutputInferDataType(out_node, idx)); + shapes.push_back(AnfAlgo::GetOutputInferShape(out_node, idx)); + } + } + if (types.empty() || shapes.empty()) { + MS_LOG(WARNING) << "buffer_fusion_info.outputs_list is empty"; + return false; + } + AnfAlgo::SetOutputInferTypeAndShape(types, shapes, buffer_fusion.get()); + AnfAlgo::SetKernelMod(kernel_ptr, buffer_fusion.get()); + SetFusionOpRefInfos(kernel_graph, buffer_fusion_info.outputs_list, buffer_fusion); + ReplaceOldNode(buffer_fusion_infos, fusion_id, buffer_fusion, kernel_graph); + return true; +} + +bool UbPatternFusion::Run(const FuncGraphPtr &graph) { + bool changed = false; + MS_EXCEPTION_IF_NULL(graph); + auto kernel_graph = graph->cast>(); + MS_EXCEPTION_IF_NULL(kernel_graph); + changed = FuseBufferFusionPattern(kernel_graph.get()); + // clear fusion_id attr + for (auto &node : graph->nodes()) { + if (node != nullptr && node->isa()) { + AnfAlgo::EraseNodeAttr(kAttrFusionId, node); + } + } + return changed; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/tbe_buffer_fusion.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h similarity index 79% rename from mindspore/ccsrc/pre_activate/ascend/buffer_fusion/tbe_buffer_fusion.h rename to mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h index 09f62c3dddd3c8a3141d6f3332757e19527176ab..4c2f91472e81679205145c146da5c3c82923f6ce 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/tbe_buffer_fusion.h +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h @@ -1,50 +1,50 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_TBE_BUFFER_FUSION_H_ -#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_TBE_BUFFER_FUSION_H_ -#include -#include -#include - -#include "ir/anf.h" -#include "pre_activate/common/pass.h" -#include "pre_activate/common/fusion_id_allocator.h" -#include "device/kernel_info.h" -#include "kernel/kernel.h" -#include "session/kernel_graph.h" -#include "pre_activate/ascend/buffer_fusion/buffer_fusion.h" - -namespace mindspore { -namespace opt { -using FusedNodeRecord = std::vector>; - -class TbeBufferFusion : public Pass { - public: - TbeBufferFusion() : Pass("TbeBufferFusion") {} - ~TbeBufferFusion() override = default; - bool Run(const FuncGraphPtr &graph) override; - - private: - void GetBufferFusionInfo(session::KernelGraph *kernel_graph, - std::unordered_map *buffer_fusion_infos) const; - bool ReplaceFusionOp(std::unordered_map *buffer_fusion_infos, int32_t fusion_id, - const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const; - bool FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const; -}; -} // namespace opt -} // namespace mindspore - -#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_TBE_BUFFER_FUSION_H_ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_UB_PATTERN_FUSION_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_UB_PATTERN_FUSION_H_ +#include +#include +#include + +#include "ir/anf.h" +#include "pre_activate/common/pass.h" +#include "pre_activate/common/fusion_id_allocator.h" +#include "device/kernel_info.h" +#include "kernel/kernel.h" +#include "session/kernel_graph.h" +#include "pre_activate/ascend/buffer_fusion/buffer_fusion.h" + +namespace mindspore { +namespace opt { +using FusedNodeRecord = std::vector>; + +class UbPatternFusion : public Pass { + public: + UbPatternFusion() : Pass("TbeBufferFusion") {} + ~UbPatternFusion() override = default; + bool Run(const FuncGraphPtr &graph) override; + + private: + void GetBufferFusionInfo(session::KernelGraph *kernel_graph, + std::unordered_map *buffer_fusion_infos) const; + bool ReplaceFusionOp(std::unordered_map *buffer_fusion_infos, int32_t fusion_id, + const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const; + bool FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const; +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_UB_PATTERN_FUSION_H_