diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc index 9ec20b3fbb9a75b10c8dc2d81db3a5a7019edd07..939e7146e625ac290c0f67c6d27b841fd012cca3 100644 --- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc +++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc @@ -722,8 +722,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode std::vector *output_desc_list) { auto output_size = AnfAlgo::GetOutputTensorNum(cnode); if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) { - // wait anther pr: auto output_used_nums = AnfAlgo::GetNodeAttr>(cnode, kAttrOutputUsedNum); - auto output_used_nums = {SizeToInt(AnfAlgo::GetNodeAttr(cnode, kAttrOutputUsedNum))}; + auto output_used_nums = AnfAlgo::GetNodeAttr>(cnode, kAttrOutputUsedNum); MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope(); if (output_used_nums.size() != output_size) { MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")" diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc index 58b8a93516b5cb565b43841bddb8342f8050a21f..abacb9137d15ed7b5c059f8ff9f089a204164967 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -282,11 +283,17 @@ kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector outputs_format; std::vector outputs_data_type; - for (size_t index = 0; index < outputs_list.size(); ++index) { - for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(outputs_list[index]); ++idx) { - auto kernel_with_index = AnfAlgo::VisitKernel(outputs_list[index], idx); - outputs_format.push_back(AnfAlgo::GetOutputFormat(kernel_with_index.first, kernel_with_index.second)); - outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(kernel_with_index.first, kernel_with_index.second)); + for (const auto &output : outputs_list) { + if (AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) { + auto tuple_getitem = output->cast(); + MS_EXCEPTION_IF_NULL(tuple_getitem); + outputs_format.push_back(AnfAlgo::GetOutputFormat( + tuple_getitem->input(1), IntToSize(GetValue(GetValueNode(tuple_getitem->input(2)))))); + outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType( + tuple_getitem->input(1), IntToSize(GetValue(GetValueNode(tuple_getitem->input(2)))))); + } else { + outputs_format.push_back(AnfAlgo::GetOutputFormat(output, 0)); + outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(output, 0)); } } builder.SetInputsFormat(inputs_format); @@ -320,32 +327,35 @@ AnfNodePtr CreateTupleGetItem(const AnfNodePtr &buffer_fusion_kernel, session::K return tuple_item; } -void ReplaceOldNode(const std::vector &outputs_list, const AnfNodePtr &buffer_fusion_kernel, - session::KernelGraph *kernel_graph) { +void ReplaceInputNodeInOtherFusionScope(std::unordered_map *buffer_fusion_infos, + int32_t fusion_id, const AnfNodePtr &output_item, + const AnfNodePtr &replace_item) { + for (int32_t id = fusion_id + 1; id <= SizeToInt(buffer_fusion_infos->size()); ++id) { + auto itr = std::find((*buffer_fusion_infos)[id].inputs_list.begin(), (*buffer_fusion_infos)[id].inputs_list.end(), + output_item); + if (itr != (*buffer_fusion_infos)[id].inputs_list.end()) { + MS_LOG(DEBUG) << "replace input of other pattern, id = " << id; + *itr = replace_item; + } + } +} + +void ReplaceOldNode(std::unordered_map *buffer_fusion_infos, int32_t fusion_id, + const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph) { MS_EXCEPTION_IF_NULL(kernel_graph); auto manager = kernel_graph->manager(); MS_EXCEPTION_IF_NULL(manager); - if (outputs_list.size() == 1) { // single output - (void)manager->Replace(outputs_list[0], buffer_fusion_kernel); + auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; + if (buffer_fusion_info.outputs_list.size() == 1) { // single output + (void)manager->Replace(buffer_fusion_info.outputs_list[0], buffer_fusion_kernel); + ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[0], + buffer_fusion_kernel); } else { // multiple output - size_t real_idx = 0; - for (size_t index = 0; index < outputs_list.size(); ++index) { - if (AnfAlgo::GetOutputTensorNum(outputs_list[index]) == 1) { - auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, real_idx++); - (void)manager->Replace(outputs_list[index], tuple_item); - } else { - std::vector make_tuple_inputs; - AbstractBasePtrList abstract_list; - make_tuple_inputs.push_back(NewValueNode(prim::kPrimMakeTuple)); - for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(outputs_list[index]); ++idx) { - auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, real_idx++); - abstract_list.push_back(tuple_item->abstract()); - make_tuple_inputs.push_back(tuple_item); - } - AnfNodePtr make_tuple = kernel_graph->NewCNode(make_tuple_inputs); - make_tuple->set_abstract(std::make_shared(abstract_list)); - (void)manager->Replace(outputs_list[index], make_tuple); - } + for (size_t index = 0; index < buffer_fusion_info.outputs_list.size(); ++index) { + auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, index); + (void)manager->Replace(buffer_fusion_info.outputs_list[index], tuple_item); + ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[index], + tuple_item); } } } @@ -406,38 +416,67 @@ void CheckCurrentNodeIsInput(const CNodePtr &node, const int32_t &cur_fusion_id, } } -void InsertNode(const AnfNodePtr &node, std::vector *list) { - MS_EXCEPTION_IF_NULL(list); - if (std::find(list->begin(), list->end(), node) == list->end()) { - (void)list->insert(list->end(), node); +void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph, + std::unordered_map *buffer_fusion_infos) { + MS_EXCEPTION_IF_NULL(buffer_fusion_infos); + auto nodes = TopoSort(kernel_graph->get_return()); + for (auto &node : nodes) { + MS_EXCEPTION_IF_NULL(node); + if (AnfAlgo::IsRealCNodeKernel(node) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, node)) { + auto fusion_id = AnfAlgo::GetNodeAttr(node, kOpAttrFusionId); + (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(node); + } } } -void CheckCurrentNodeIsOutput(const CNodePtr &node, const int32_t &cur_fusion_id, - std::unordered_map *buffer_fusion_infos) { - MS_EXCEPTION_IF_NULL(node); +void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph, + std::unordered_map *buffer_fusion_infos) { + MS_EXCEPTION_IF_NULL(kernel_graph); MS_EXCEPTION_IF_NULL(buffer_fusion_infos); - for (auto &input : node->inputs()) { - MS_EXCEPTION_IF_NULL(input); - if (AnfAlgo::IsRealCNodeKernel(input) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, input)) { - auto fusion_id = AnfAlgo::GetNodeAttr(input, kOpAttrFusionId); - if (buffer_fusion_infos->find(fusion_id) == buffer_fusion_infos->end()) { - BufferFusionInfo_t buffer_fusion_info; - (*buffer_fusion_infos)[fusion_id] = buffer_fusion_info; - } - if (fusion_id != cur_fusion_id) { - InsertNode(input, &((*buffer_fusion_infos)[fusion_id].outputs_list)); - } - } else if (input->isa()) { - for (auto &input_in : input->cast()->inputs()) { - if (AnfAlgo::IsRealCNodeKernel(input_in) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, input_in)) { - auto fusion_id = AnfAlgo::GetNodeAttr(input_in, kOpAttrFusionId); - if (buffer_fusion_infos->find(fusion_id) == buffer_fusion_infos->end()) { - BufferFusionInfo_t buffer_fusion_info; - (*buffer_fusion_infos)[fusion_id] = buffer_fusion_info; + auto manager = kernel_graph->manager(); + MS_EXCEPTION_IF_NULL(manager); + + for (auto &buffer_fusion_info : *buffer_fusion_infos) { + auto fusion_id = buffer_fusion_info.first; + auto fusion_info = buffer_fusion_info.second; + for (const auto &node : fusion_info.anf_nodes) { + if (AnfAlgo::GetOutputTensorNum(node) == 1) { + for (auto use_node : manager->node_users()[node]) { + if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), use_node.first) == + fusion_info.anf_nodes.end()) { + (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(node); + break; + } + } + } else { + int prev_idx = 0; + std::vector tuple_getitem_nodes; + std::transform(manager->node_users()[node].begin(), manager->node_users()[node].end(), + std::back_inserter(tuple_getitem_nodes), + [](const std::pair &use_node) { return use_node.first; }); + std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(), + [](const AnfNodePtr &node1, const AnfNodePtr &node2) { + auto getitem1 = node1->cast(); + auto getitem2 = node2->cast(); + auto output_idx1 = GetValue(GetValueNode(getitem1->input(2))); + auto output_idx2 = GetValue(GetValueNode(getitem2->input(2))); + return output_idx1 < output_idx2; + }); + for (auto getitem : tuple_getitem_nodes) { + auto getitem_ptr = getitem->cast(); + auto input2 = getitem_ptr->input(2); + auto output_idx = GetValue(GetValueNode(input2)); + for (int stub_idx = prev_idx; stub_idx < output_idx; ++stub_idx) { + auto stub_node = CreateTupleGetItem(node, kernel_graph, IntToSize(stub_idx)); + (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(stub_node); } - if (fusion_id != cur_fusion_id) { - InsertNode(input_in, &((*buffer_fusion_infos)[fusion_id].outputs_list)); + prev_idx = output_idx + 1; + for (auto item_use_node : manager->node_users()[getitem]) { + if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), item_use_node.first) == + fusion_info.anf_nodes.end()) { + (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(getitem); + break; + } } } } @@ -445,15 +484,72 @@ void CheckCurrentNodeIsOutput(const CNodePtr &node, const int32_t &cur_fusion_id } } -void GetFusionScopeNodeList(const session::KernelGraph &kernel_graph, - std::unordered_map *buffer_fusion_infos) { - MS_EXCEPTION_IF_NULL(buffer_fusion_infos); - auto nodes = TopoSort(kernel_graph.get_return()); - for (auto &node : nodes) { - MS_EXCEPTION_IF_NULL(node); - if (AnfAlgo::IsRealCNodeKernel(node) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, node)) { - auto fusion_id = AnfAlgo::GetNodeAttr(node, kOpAttrFusionId); - (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(node); +void MatchConvBnreduce(const CNodePtr &cnode, const session::KernelGraph &kernel_graph, + std::unordered_set *fused_set, FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(fused_set); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + auto conv = cnode->input(1); + if (conv->isa() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) { + std::vector output_used_num{SizeToInt(manager->node_users()[conv].size())}; + AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), conv); + std::unordered_set record{cnode, conv}; + candidate_fusion->push_back(record); + fused_set->insert(record.begin(), record.end()); + } +} + +void MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, + std::unordered_set *fused_set, FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(fused_set); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + auto getitem = relu_input->cast(); + auto bnupdate = getitem->input(1); + if (bnupdate->isa() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { + std::vector output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); + for (auto out_getitem : manager->node_users()[bnupdate]) { + auto out_getitem_ptr = out_getitem.first->cast(); + auto input2 = out_getitem_ptr->input(2); + auto output_idx = GetValue(GetValueNode(input2)); + output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); + } + AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); + std::unordered_set record{cnode, bnupdate}; + candidate_fusion->push_back(record); + fused_set->insert(record.begin(), record.end()); + } +} + +void MatchBnupdateAddRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph, + std::unordered_set *fused_set, FusedNodeRecord *candidate_fusion) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(fused_set); + MS_EXCEPTION_IF_NULL(candidate_fusion); + auto manager = kernel_graph.manager(); + MS_EXCEPTION_IF_NULL(manager); + auto add = relu_input->cast(); + MS_EXCEPTION_IF_NULL(add); + auto tuple_getitem = add->input(1); + if (tuple_getitem->isa() && AnfAlgo::GetCNodeName(tuple_getitem) == prim::kPrimTupleGetItem->name()) { + auto getitem = tuple_getitem->cast(); + auto bnupdate = getitem->input(1); + if (bnupdate->isa() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) { + std::vector output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0); + for (auto out_getitem : manager->node_users()[bnupdate]) { + auto out_getitem_ptr = out_getitem.first->cast(); + auto input2 = out_getitem_ptr->input(2); + auto output_idx = GetValue(GetValueNode(input2)); + output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size()); + } + AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate); + std::unordered_set record{cnode, relu_input, bnupdate}; + candidate_fusion->push_back(record); + fused_set->insert(record.begin(), record.end()); } } } @@ -470,15 +566,14 @@ void MatchOpNamePattern(const session::KernelGraph &kernel_graph, std::unordered auto cnode = node->cast(); MS_EXCEPTION_IF_NULL(cnode); if (AnfAlgo::GetCNodeName(cnode) == kBNTrainingReduceOpName) { - auto conv = cnode->input(1); - if (conv->isa() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) { - auto manager = kernel_graph.manager(); - MS_EXCEPTION_IF_NULL(manager); - auto &users = manager->node_users(); - AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(users[conv].size()), conv); - std::unordered_set record({cnode, conv}); - candidate_fusion->push_back(record); - fused_set->insert(record.begin(), record.end()); + MatchConvBnreduce(cnode, kernel_graph, fused_set, candidate_fusion); + } else if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName || + AnfAlgo::GetCNodeName(cnode) == prim::kPrimRelu->name()) { + auto relu_input = cnode->input(1); + if (relu_input->isa() && AnfAlgo::GetCNodeName(relu_input) == prim::kPrimTensorAdd->name()) { + MatchBnupdateAddRelu(cnode, relu_input, kernel_graph, fused_set, candidate_fusion); + } else if (relu_input->isa() && AnfAlgo::GetCNodeName(relu_input) == prim::kPrimTupleGetItem->name()) { + MatchBnupdateRelu(cnode, relu_input, kernel_graph, fused_set, candidate_fusion); } } } @@ -536,27 +631,23 @@ void MatchFusionTypePattern(const session::KernelGraph &kernel_graph, std::unord } } // namespace -void BufferFusion::GetBufferFusionInfo(const session::KernelGraph &kernel_graph, +void BufferFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph, std::unordered_map *buffer_fusion_infos) const { MS_EXCEPTION_IF_NULL(buffer_fusion_infos); - std::vector node_list = TopoSort(kernel_graph.get_return()); + std::vector node_list = TopoSort(kernel_graph->get_return()); for (auto &node : node_list) { if (!AnfAlgo::IsRealCNodeKernel(node)) { continue; } - - int32_t cur_fusion_id = -1; auto cnode = node->cast(); MS_EXCEPTION_IF_NULL(cnode); if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, cnode)) { - cur_fusion_id = AnfAlgo::GetNodeAttr(cnode, kOpAttrFusionId); + auto cur_fusion_id = AnfAlgo::GetNodeAttr(cnode, kOpAttrFusionId); CheckCurrentNodeIsInput(cnode, cur_fusion_id, buffer_fusion_infos); } - // Check if current node is output - CheckCurrentNodeIsOutput(cnode, cur_fusion_id, buffer_fusion_infos); } - - GetFusionScopeNodeList(kernel_graph, buffer_fusion_infos); + GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos); + GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos); for (auto &buffer_fusion_info : *buffer_fusion_infos) { buffer_fusion_info.second.kernel_build_info = CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list_in, buffer_fusion_info.second.inputs_list, @@ -569,7 +660,7 @@ bool BufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) c bool change = false; std::unordered_map buffer_fusion_infos; buffer_fusion_infos.clear(); - GetBufferFusionInfo(*kernel_graph, &buffer_fusion_infos); + GetBufferFusionInfo(kernel_graph, &buffer_fusion_infos); std::vector fusion_scope_infos; for (auto &buffer_fusion_info : buffer_fusion_infos) { @@ -600,7 +691,7 @@ bool BufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) c MS_LOG(DEBUG) << "fusion id: " << fusion_id << ", fusion op compiling failed"; continue; } - change = ReplaceFusionOp(buffer_fusion_infos[fusion_id], kernel_mods[fusion_id], kernel_graph); + change = ReplaceFusionOp(&buffer_fusion_infos, fusion_id, kernel_mods[fusion_id], kernel_graph); } MS_LOG(DEBUG) << "End Buffer Fusion"; return change; @@ -630,8 +721,10 @@ bool BufferFusion::MatchBufferFusionPattern(const session::KernelGraph &kernel_g return true; } -bool BufferFusion::ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, const kernel::KernelModPtr &kernel_ptr, +bool BufferFusion::ReplaceFusionOp(std::unordered_map *buffer_fusion_infos, + int32_t fusion_id, const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const { + auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id]; auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list, buffer_fusion_info.anf_nodes, kernel_graph); AnfAlgo::SetSelectKernelBuildInfo(buffer_fusion_info.kernel_build_info, buffer_fusion.get()); @@ -651,7 +744,7 @@ bool BufferFusion::ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, AnfAlgo::SetOutputInferTypeAndShape(types, shapes, buffer_fusion.get()); AnfAlgo::SetKernelMod(kernel_ptr, buffer_fusion.get()); // replace node - ReplaceOldNode(buffer_fusion_info.outputs_list, buffer_fusion, kernel_graph); + ReplaceOldNode(buffer_fusion_infos, fusion_id, buffer_fusion, kernel_graph); return true; } diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h index c54fd0cd97d4f140c53a578a9f3c665ce1fb5ffc..9bed7217dd18c0a6adac6187a9612d10c9e07e94 100644 --- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h +++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h @@ -44,10 +44,10 @@ class BufferFusion : public Pass { bool Run(const FuncGraphPtr &graph) override; private: - void GetBufferFusionInfo(const session::KernelGraph &kernel_graph, + void GetBufferFusionInfo(session::KernelGraph *kernel_graph, std::unordered_map *buffer_fusion_infos) const; - bool ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, const kernel::KernelModPtr &kernel_ptr, - session::KernelGraph *kernel_graph) const; + bool ReplaceFusionOp(std::unordered_map *buffer_fusion_infos, int32_t fusion_id, + const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const; bool MatchBufferFusionPattern(const session::KernelGraph &kernel_graph) const; bool FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const; }; diff --git a/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc deleted file mode 100644 index 9807344139e9ea67b6b8cc56ff254ad5fee773dd..0000000000000000000000000000000000000000 --- a/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc +++ /dev/null @@ -1,1298 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include - -#include "common/common_test.h" -#include "common/py_func_graph_fetcher.h" - -#include "ir/anf.h" -#include "ir/func_graph_cloner.h" -#include "utils/context/ms_context.h" -#include "debug/draw.h" -#include "debug/anf_ir_dump.h" -#include "operator/ops.h" -#include "utils/utils.h" -#include "kernel/tbe/tbe_kernel_mod.h" -#include "session/kernel_graph.h" -#include "device/kernel_info.h" -#include "session/anf_runtime_algorithm.h" -#include "pre_activate/common/pattern_engine.h" -#define private public -#include "pre_activate/ascend/buffer_fusion/buffer_fusion.h" - -namespace mindspore { -namespace opt { -using Primitive = mindspore::Primitive; -using session::KernelGraph; -using KernelGraphPtr = std::shared_ptr; -using KernelBuildInfoBuilder = kernel::KernelBuildInfo::KernelBuildInfoBuilder; -class TestHWBufferFusion : public UT::Common { - public: - TestHWBufferFusion() : getPyFun_("gtest_input.pre_activate.hw_opt_test", true) {} - - public: - UT::PyFuncGraphFetcher getPyFun_; -}; - -static KernelGraphPtr CreateKernelGraphForBufferFusionMultipleIn( - uint32_t after_layers, mindspore::kernel::FusionType fusiontype = mindspore::kernel::CONVLUTION) { - KernelGraphPtr g = std::make_shared(); - std::vector inputs; - - std::vector shp = {1, 3, 3, 4}; - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - - uint32_t layerscount = 1; - CNodePtr ptr_formerlayer; - std::string name = ""; - - // Construct first node - tensor::TensorPtr y_tensor = std::make_shared(kFloat32->type_id(), shp); - y_tensor->set_device_info(device_info); - tensor::TensorPtr z_tensor = std::make_shared(kFloat32->type_id(), shp); - z_tensor->set_device_info(device_info); - - auto y_const = NewValueNode(y_tensor); - auto z_const = NewValueNode(z_tensor); - y_const->set_abstract(y_tensor->ToAbstract()); - z_const->set_abstract(z_tensor->ToAbstract()); - g->MutableInputs()->push_back(y_const); - g->MutableInputs()->push_back(z_const); - - auto p_conv = std::make_shared("Conv2D"); - std::vector input_names = {"x", "y"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_conv->set_attr("input_names", input_names_v); - p_conv->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_conv)); - inputs.push_back(y_const); - inputs.push_back(z_const); - name = "test_conv_" + std::to_string(layerscount) + "layers_graph.dot"; - - auto kernelptr_first = g->NewCNode(inputs); - kernelptr_first->set_abstract(y_tensor->ToAbstract()); - kernelptr_first->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - - builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(fusiontype); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_first.get()); - ptr_formerlayer = kernelptr_first; - - // configure fusion successor layers - int layer_idx = 0; - while (after_layers--) { - auto p_relu = std::make_shared("ReLU6"); - if (layer_idx == 0) { - tensor::TensorPtr x_tensor = std::make_shared(kFloat32->type_id(), shp); - x_tensor->set_device_info(device_info); - - auto x_const = NewValueNode(x_tensor); - x_const->set_abstract(x_tensor->ToAbstract()); - std::vector input_names = {"x", "y"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(ptr_formerlayer); - inputs.push_back(x_const); - } else { - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(ptr_formerlayer); - } - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(y_tensor->ToAbstract()); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - if (layer_idx == 0) { - builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - } else { - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - } - - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get()); - ptr_formerlayer = kernelptr_floor; - layerscount++; - layer_idx++; - } - - // return res - auto p_return = std::make_shared("return"); - inputs.clear(); - inputs.push_back(NewValueNode(p_return)); - inputs.push_back(ptr_formerlayer); - auto ret = g->NewCNode(inputs); - ret->set_abstract(y_tensor->ToAbstract()); - - g->set_return(ret); - - draw::Draw(name, g); - - return g; -} - -static KernelGraphPtr CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter( - uint32_t before_layers, uint32_t after_layers = 3, - mindspore::kernel::FusionType fusiontype = mindspore::kernel::SEGMENT) { - KernelGraphPtr g = std::make_shared(); - std::vector inputs; - - std::vector shp = {1, 3, 3, 4}; - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - - uint32_t layerscount = 1; - CNodePtr ptr_formerlayer; - std::string name = ""; - tensor::TensorPtr x_tensor = std::make_shared(kFloat32->type_id(), shp); - auto x_abstract = x_tensor->ToAbstract(); - auto x_const = NewValueNode(x_tensor); - x_const->set_abstract(x_abstract); - g->MutableInputs()->push_back(x_const); - - while (before_layers--) { - auto p_relu = std::make_shared("ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - if (layerscount == 1) { - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(x_const); - } else { - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(ptr_formerlayer); - } - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(x_abstract); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get()); - ptr_formerlayer = kernelptr_floor; - layerscount++; - } - - // Construct the conv2d node - tensor::TensorPtr y_tensor = std::make_shared(kFloat32->type_id(), shp); - y_tensor->set_device_info(device_info); - auto y_const = NewValueNode(y_tensor); - y_const->set_abstract(y_tensor->ToAbstract()); - - if (fusiontype == kernel::FusionType::CONVLUTION) { - auto p_conv = std::make_shared("Conv2D"); - std::vector input_names = {"x", "y"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_conv->set_attr("input_names", input_names_v); - p_conv->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_conv)); - inputs.push_back(y_const); - inputs.push_back(ptr_formerlayer); - name = "test_conv_" + std::to_string(layerscount) + "layers_graph.dot"; - } else { - auto p_red_seg = std::make_shared("ReduceOrSegment"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_red_seg->set_attr("input_names", input_names_v); - p_red_seg->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_red_seg)); - inputs.push_back(ptr_formerlayer); - name = "test_regOrSeg_" + std::to_string(layerscount) + "layers_graph.dot"; - } - - auto kernelptr_first = g->NewCNode(inputs); - kernelptr_first->set_abstract(y_tensor->ToAbstract()); - kernelptr_first->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - if (fusiontype == kernel::FusionType::CONVLUTION) { - builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - } else { - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - } - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(fusiontype); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_first.get()); - ptr_formerlayer = kernelptr_first; - - // configure fusion successor layers - while (after_layers--) { - auto p_relu = std::make_shared("ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(ptr_formerlayer); - - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(y_tensor->ToAbstract()); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get()); - ptr_formerlayer = kernelptr_floor; - layerscount++; - } - - // return res - auto p_return = std::make_shared("return"); - inputs.clear(); - inputs.push_back(NewValueNode(p_return)); - inputs.push_back(ptr_formerlayer); - auto ret = g->NewCNode(inputs); - ret->set_abstract(y_tensor->ToAbstract()); - g->set_return(ret); - draw::Draw(name, g); - return g; -} - -static KernelGraphPtr CreateKernelGraphForBufferFusionSingleIn( - uint32_t after_layers, mindspore::kernel::FusionType fusiontype = mindspore::kernel::CONVLUTION) { - // build the func_graph manually, eg: - /* CreateKernelGraphForBufferFusionSingleIn(1) - * @mindspore - * def f(x): - * z=conv2d(x, y) - * ret=relu(z) - * return ret - */ - KernelGraphPtr g = std::make_shared(); - std::vector inputs; - - std::vector shp = {1, 3, 3, 4}; - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - - uint32_t layerscount = 1; - CNodePtr ptr_formerlayer; - std::string name = ""; - - // Construct first node - tensor::TensorPtr y_tensor = std::make_shared(kFloat32->type_id(), shp); - y_tensor->set_device_info(device_info); - tensor::TensorPtr z_tensor = std::make_shared(kFloat32->type_id(), shp); - z_tensor->set_device_info(device_info); - - auto y_const = NewValueNode(y_tensor); - auto z_const = NewValueNode(z_tensor); - y_const->set_abstract(y_tensor->ToAbstract()); - z_const->set_abstract(z_tensor->ToAbstract()); - g->MutableInputs()->push_back(y_const); - g->MutableInputs()->push_back(z_const); - - if (fusiontype == kernel::FusionType::CONVLUTION) { - auto p_conv = std::make_shared("Conv2D"); - std::vector input_names = {"x", "y"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_conv->set_attr("input_names", input_names_v); - p_conv->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_conv)); - inputs.push_back(y_const); - inputs.push_back(z_const); - name = "test_conv_" + std::to_string(layerscount) + "layers_graph.dot"; - } else { - auto p_red_seg = std::make_shared("ReduceOrSegment"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_red_seg->set_attr("input_names", input_names_v); - p_red_seg->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_red_seg)); - inputs.push_back(y_const); - name = "test_regOrSeg_" + std::to_string(layerscount) + "layers_graph.dot"; - } - - auto kernelptr_first = g->NewCNode(inputs); - kernelptr_first->set_abstract(y_tensor->ToAbstract()); - kernelptr_first->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - if (fusiontype == kernel::FusionType::CONVLUTION) { - builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - } else { - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - } - - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(fusiontype); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_first.get()); - ptr_formerlayer = kernelptr_first; - - // configure fusion successor layers - while (after_layers--) { - auto p_relu = std::make_shared("ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(ptr_formerlayer); - - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(y_tensor->ToAbstract()); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get()); - ptr_formerlayer = kernelptr_floor; - layerscount++; - } - - // return res - auto p_return = std::make_shared("return"); - inputs.clear(); - inputs.push_back(NewValueNode(p_return)); - inputs.push_back(ptr_formerlayer); - auto ret = g->NewCNode(inputs); - ret->set_abstract(y_tensor->ToAbstract()); - - g->set_return(ret); - - draw::Draw(name, g); - - return g; -} - -static KernelGraphPtr CreateKernelGraphForBufferFusion( - uint32_t targetlayers, bool conv_flag = false, - mindspore::kernel::FusionType fusiontype = mindspore::kernel::CONVLUTION) { - // build the func_graph manually, eg: - /* CreateKernelGraphForBufferFusion(3) - * @mindspore - * def f(x): - * y=relu(x) - * z=relu(y) - * ret=relu(z) - * return ret - */ - KernelGraphPtr g = std::make_shared(); - std::vector inputs; - // x is input tensor. - std::vector shp = {1, 3, 3, 4}; - tensor::TensorPtr x_tensor = std::make_shared(kFloat32->type_id(), shp); - - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - x_tensor->set_device_info(device_info); - - auto x_abstract = x_tensor->ToAbstract(); - auto x_const = NewValueNode(x_tensor); - x_const->set_abstract(x_abstract); - g->MutableInputs()->push_back(x_const); - - uint32_t layerscount = 1; - CNodePtr ptr_formerlayer; - // configure func_graph hiden layers - while (targetlayers--) { - auto p_relu = std::make_shared("ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - if (layerscount == 1) { - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(x_const); - } else { - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(ptr_formerlayer); - } - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(x_abstract); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get()); - ptr_formerlayer = kernelptr_floor; - layerscount++; - } - std::string name = "test_construct_" + std::to_string(layerscount) + "layers_graph.dot"; - if (conv_flag) { - tensor::TensorPtr y_tensor = std::make_shared(kFloat32->type_id(), shp); - y_tensor->set_device_info(device_info); - tensor::TensorPtr z_tensor = std::make_shared(kFloat32->type_id(), shp); - z_tensor->set_device_info(device_info); - auto y_const = NewValueNode(y_tensor); - auto z_const = NewValueNode(y_tensor); - - y_const->set_abstract(y_tensor->ToAbstract()); - z_const->set_abstract(z_tensor->ToAbstract()); - - g->MutableInputs()->push_back(y_const); - - if (fusiontype == kernel::FusionType::CONVLUTION) { - auto p_conv = std::make_shared("Conv2D"); - std::vector input_names = {"x", "y"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_conv->set_attr("input_names", input_names_v); - p_conv->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_conv)); - inputs.push_back(y_const); - inputs.push_back(ptr_formerlayer); - } else { - auto p_conv = std::make_shared("ReduceOrSegment"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_conv->set_attr("input_names", input_names_v); - p_conv->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_conv)); - inputs.push_back(ptr_formerlayer); - } - - auto kernelptr_conv = g->NewCNode(inputs); - kernelptr_conv->set_abstract(x_abstract); - kernelptr_conv->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - if (fusiontype == kernel::FusionType::CONVLUTION) { - builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - } else { - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - } - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(fusiontype); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_conv.get()); - ptr_formerlayer = kernelptr_conv; - name = "test_conv_" + std::to_string(layerscount) + "layers_graph.dot"; - } - // return res - auto p_return = std::make_shared("return"); - inputs.clear(); - inputs.push_back(NewValueNode(p_return)); - inputs.push_back(ptr_formerlayer); - auto ret = g->NewCNode(inputs); - ret->set_abstract(x_abstract); - - g->set_return(ret); - - draw::Draw(name, g); - - return g; -} - -CNodePtr CreateKernelGraphBranch(KernelGraphPtr g, CNodePtr inputptr, int layers, - const kernel::FusionType fusiontype = kernel::FusionType::CONVLUTION) { - std::vector shp = {1, 3, 3, 4}; - tensor::TensorPtr x_tensor = std::make_shared(kFloat32->type_id(), shp); - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - x_tensor->set_device_info(device_info); - auto x_abstract = x_tensor->ToAbstract(); - auto x_const = NewValueNode(x_tensor); - x_const->set_abstract(x_abstract); - - CNodePtr ptr_formerlayer = inputptr; - while (layers--) { - auto p_relu = std::make_shared("ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - std::vector inputs; - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(ptr_formerlayer); - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(x_abstract); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get()); - ptr_formerlayer = kernelptr_floor; - } - - tensor::TensorPtr y_tensor = std::make_shared(kFloat32->type_id(), shp); - y_tensor->set_device_info(device_info); - tensor::TensorPtr z_tensor = std::make_shared(kFloat32->type_id(), shp); - z_tensor->set_device_info(device_info); - auto y_const = NewValueNode(y_tensor); - auto z_const = NewValueNode(y_tensor); - - y_const->set_abstract(y_tensor->ToAbstract()); - z_const->set_abstract(z_tensor->ToAbstract()); - - g->MutableInputs()->push_back(y_const); - - auto p_conv = std::make_shared("Conv2D"); - std::vector input_names = {"x", "y"}; - std::vector output_names = {"output"}; - - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_conv->set_attr("input_names", input_names_v); - p_conv->set_attr("output_names", output_names_v); - - std::vector inputs; - inputs.clear(); - inputs.push_back(NewValueNode(p_conv)); - inputs.push_back(y_const); - inputs.push_back(ptr_formerlayer); - - auto kernelptr_conv = g->NewCNode(inputs); - kernelptr_conv->set_abstract(x_abstract); - kernelptr_conv->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(fusiontype); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_conv.get()); - return kernelptr_conv; -} - -static KernelGraphPtr CreateKernelGraphForMultiUse(uint32_t targetlayer1s, uint32_t targetlayer2s) { - /* @mindspore - * def f(x): - * multi_use=relu(x) - * y=relu(multi_use) - * z=relu(multi_use) - * ret=relu(y, z) - * return ret - */ - KernelGraphPtr g = std::make_shared(); - std::vector inputs; - // x is input tensor. - std::vector shp = {1, 3, 3, 4}; - tensor::TensorPtr x_tensor = std::make_shared(kFloat32->type_id(), shp); - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - x_tensor->set_device_info(device_info); - - auto x_abstract = x_tensor->ToAbstract(); - auto x_const = NewValueNode(x_tensor); - x_const->set_abstract(x_abstract); - - g->MutableInputs()->push_back(x_const); - - auto p_multi = std::make_shared("MULTI_USE_ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_multi->set_attr("input_names", input_names_v); - p_multi->set_attr("output_names", output_names_v); - inputs.clear(); - inputs.push_back(NewValueNode(p_multi)); - inputs.push_back(x_const); - auto kernelptr_multi = g->NewCNode(inputs); - kernelptr_multi->set_abstract(x_abstract); - kernelptr_multi->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_multi.get()); - - CNodePtr outptrbranch1 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer2s); - CNodePtr outptrbranch2 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer1s); - - auto p_relu = std::make_shared("ReLU6"); - input_names = {"x"}; - output_names = {"output"}; - input_names_v = MakeValue(input_names); - output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(outptrbranch1); - inputs.push_back(outptrbranch2); - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(x_abstract); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder1; - builder1.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder1.SetOutputsFormat({kOpFormat_NCHW}); - builder1.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - builder1.SetOutputsDeviceType({kFloat32->type_id()}); - builder1.SetKernelType(KernelType::TBE_KERNEL); - builder1.SetFusionType(kernel::FusionType::ELEMWISE); - builder1.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), kernelptr_floor.get()); - - // return res - auto p_return = std::make_shared("return"); - inputs.clear(); - inputs.push_back(NewValueNode(p_return)); - inputs.push_back(kernelptr_floor); - auto ret = g->NewCNode(inputs); - ret->set_abstract(x_abstract); - - g->set_return(ret); - string name = "multi_use_graph.dot"; - draw::Draw(name, g); - - return g; -} -#ifdef BUFFER_FUSION_MULTI_OUT -static KernelGraphPtr CreateKernelGraphForMultiOutputWithLinearInput( - uint32_t targetlayer1s, uint32_t targetlayer2s, bool use_flag = true, - const kernel::FusionType fusion_type = kernel::FusionType::CONVLUTION) { - KernelGraphPtr g = std::make_shared(); - std::vector inputs; - // x is input tensor. - std::vector shp = {1, 3, 3, 4}; - tensor::TensorPtr x_tensor = std::make_shared(kFloat32->type_id(), shp); - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - x_tensor->set_device_info(device_info); - - auto x_abstract = x_tensor->ToAbstract(); - auto x_const = NewValueNode(x_tensor); - x_const->set_abstract(x_abstract); - g->MutableInputs()->push_back(x_const); - - auto p_relu0 = std::make_shared("ReLU6"); - std::vector input_names0 = {"x"}; - std::vector output_names0 = {"output"}; - ValuePtr input_names_v0 = MakeValue(input_names0); - ValuePtr output_names_v0 = MakeValue(output_names0); - p_relu0->set_attr("input_names", input_names_v0); - p_relu0->set_attr("output_names", output_names_v0); - inputs.clear(); - inputs.push_back(NewValueNode(p_relu0)); - inputs.push_back(x_const); - auto kernelptr_floor0 = g->NewCNode(inputs); - kernelptr_floor0->set_abstract(x_abstract); - kernelptr_floor0->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder0; - builder0.SetInputsFormat({kOpFormat_NCHW}); - builder0.SetOutputsFormat({kOpFormat_NCHW}); - builder0.SetInputsDeviceType({kFloat32->type_id()}); - builder0.SetOutputsDeviceType({kFloat32->type_id()}); - builder0.SetKernelType(KernelType::TBE_KERNEL); - builder0.SetFusionType(kernel::FusionType::ELEMWISE); - builder0.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder0.Build(), kernelptr_floor0.get()); - CNodePtr ptr_formerlayer; - ptr_formerlayer = kernelptr_floor0; - - auto p_multi = std::make_shared("MULTI_USE_ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_multi->set_attr("input_names", input_names_v); - p_multi->set_attr("output_names", output_names_v); - inputs.clear(); - inputs.push_back(NewValueNode(p_multi)); - inputs.push_back(ptr_formerlayer); - auto kernelptr_multi = g->NewCNode(inputs); - kernelptr_multi->set_abstract(x_abstract); - kernelptr_multi->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat16->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_multi.get()); - - CNodePtr outptrbranch2 = nullptr; - CNodePtr outptrbranch1 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer2s, fusion_type); - if (use_flag) { - outptrbranch2 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer1s, fusion_type); - } - auto p_relu = std::make_shared("ReLU6"); - input_names = {"x"}; - output_names = {"output"}; - input_names_v = MakeValue(input_names); - output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(outptrbranch1); - if (use_flag) { - inputs.push_back(outptrbranch2); - } - - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(x_abstract); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder1; - if (use_flag) { - builder1.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder1.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - } else { - builder1.SetInputsFormat({kOpFormat_NCHW}); - builder1.SetInputsDeviceType({kFloat32->type_id()}); - } - builder1.SetOutputsFormat({kOpFormat_NCHW}); - builder1.SetOutputsDeviceType({kFloat32->type_id()}); - builder1.SetKernelType(KernelType::TBE_KERNEL); - builder1.SetFusionType(kernel::FusionType::ELEMWISE); - builder1.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), kernelptr_floor.get()); - cout << "built two branches done" << endl; - // return res - auto p_return = std::make_shared("return"); - inputs.clear(); - inputs.push_back(NewValueNode(p_return)); - inputs.push_back(kernelptr_floor); - auto ret = g->NewCNode(inputs); - ret->set_abstract(x_abstract); - - g->set_return(ret); - string name = "multi_use_graph.dot"; - draw::Draw(name, g); - - return g; -} - -static KernelGraphPtr CreateKernelGraphForMultiOutput( - uint32_t targetlayer1s, uint32_t targetlayer2s, bool use_flag = true, - const kernel::FusionType fusion_type = kernel::FusionType::CONVLUTION) { - KernelGraphPtr g = std::make_shared(); - std::vector inputs; - // x is input tensor. - std::vector shp = {1, 3, 3, 4}; - tensor::TensorPtr x_tensor = std::make_shared(kFloat32->type_id(), shp); - TensorTypePtr tensor_type = std::make_shared(kFloat32); - tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type}; - x_tensor->set_device_info(device_info); - - auto x_abstract = x_tensor->ToAbstract(); - auto x_const = NewValueNode(x_tensor); - x_const->set_abstract(x_abstract); - g->MutableInputs()->push_back(x_const); - - auto p_multi = std::make_shared("MULTI_USE_ReLU6"); - std::vector input_names = {"x"}; - std::vector output_names = {"output"}; - ValuePtr input_names_v = MakeValue(input_names); - ValuePtr output_names_v = MakeValue(output_names); - p_multi->set_attr("input_names", input_names_v); - p_multi->set_attr("output_names", output_names_v); - inputs.clear(); - inputs.push_back(NewValueNode(p_multi)); - inputs.push_back(x_const); - auto kernelptr_multi = g->NewCNode(inputs); - kernelptr_multi->set_abstract(x_abstract); - kernelptr_multi->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NCHW}); - builder.SetOutputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder.SetInputsDeviceType({kFloat32->type_id()}); - builder.SetOutputsDeviceType({kFloat16->type_id(), kFloat32->type_id()}); - builder.SetKernelType(KernelType::TBE_KERNEL); - builder.SetFusionType(kernel::FusionType::ELEMWISE); - builder.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_multi.get()); - - CNodePtr outptrbranch2 = nullptr; - CNodePtr outptrbranch1 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer2s, fusion_type); - if (use_flag) { - outptrbranch2 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer1s, fusion_type); - } - auto p_relu = std::make_shared("ReLU6"); - input_names = {"x"}; - output_names = {"output"}; - input_names_v = MakeValue(input_names); - output_names_v = MakeValue(output_names); - p_relu->set_attr("input_names", input_names_v); - p_relu->set_attr("output_names", output_names_v); - - inputs.clear(); - inputs.push_back(NewValueNode(p_relu)); - inputs.push_back(outptrbranch1); - if (use_flag) { - inputs.push_back(outptrbranch2); - } - auto kernelptr_floor = g->NewCNode(inputs); - kernelptr_floor->set_abstract(x_abstract); - kernelptr_floor->set_kernel_info(std::make_shared()); - KernelBuildInfoBuilder builder1; - if (use_flag) { - builder1.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW}); - builder1.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()}); - } else { - builder1.SetInputsFormat({kOpFormat_NCHW}); - builder1.SetInputsDeviceType({kFloat32->type_id()}); - } - builder1.SetOutputsFormat({kOpFormat_NCHW}); - builder1.SetOutputsDeviceType({kFloat32->type_id()}); - builder1.SetKernelType(KernelType::TBE_KERNEL); - builder1.SetFusionType(kernel::FusionType::ELEMWISE); - builder1.SetProcessor(kernel::Processor::AICORE); - AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), kernelptr_floor.get()); - - // return res - auto p_return = std::make_shared("return"); - inputs.clear(); - inputs.push_back(NewValueNode(p_return)); - inputs.push_back(kernelptr_floor); - auto ret = g->NewCNode(inputs); - ret->set_abstract(x_abstract); - - g->set_return(ret); - string name = "multi_use_graph.dot"; - draw::Draw(name, g); - - return g; -} -#endif -TEST_F(TestHWBufferFusion, BufferFusionlayerSingleIn1) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionSingleIn(1); - ASSERT_TRUE(nullptr != graph_ptr); - draw::Draw("before_BufferFusionlayerSingleIn1.dot", graph_ptr); - - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 8); - buffer_fusion.Run(graph_ptr); - draw::Draw("after_BufferFusionlayerSingleIn1.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 6); -} - -TEST_F(TestHWBufferFusion, BufferFusionlayerSingleIn2) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionSingleIn(2); - ASSERT_TRUE(nullptr != graph_ptr); - draw::Draw("before_BufferFusionlayerSingleIn2.dot", graph_ptr); - - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 10); - buffer_fusion.Run(graph_ptr); - draw::Draw("after_BufferFusionlayerSingleIn2.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 6); -} - -TEST_F(TestHWBufferFusion, BufferFusionlayerSingleIn3) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionSingleIn(3); - ASSERT_TRUE(nullptr != graph_ptr); - draw::Draw("before_BufferFusionlayerSingleIn3.dot", graph_ptr); - - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 12); - buffer_fusion.Run(graph_ptr); - draw::Draw("after_BufferFusionlayerSingleIn3.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 6); -} - -TEST_F(TestHWBufferFusion, BufferFusionlayer1) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(1); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 5); - buffer_fusion.Run(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionlayer2) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(2); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 7); - buffer_fusion.Run(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionlayer4) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(4); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 11); - buffer_fusion.Run(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionlayer6) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(6); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 15); - buffer_fusion.Run(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 7); -} - -TEST_F(TestHWBufferFusion, BufferFusionlayer8) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(8); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 19); - buffer_fusion.Run(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 7); -} - -TEST_F(TestHWBufferFusion, BufferFusionconv1) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(1, true); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), false); -} - -TEST_F(TestHWBufferFusion, BufferFusionconv8) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(8, true); - draw::Draw("before_BufferFusionconv8.dot", graph_ptr); - - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true); - kernel::KernelPackPtr kernel_pack = std::make_shared(); - auto kernel_ptr = std::make_shared(kernel_pack); - std::unordered_map buffer_fusion_infos; - buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos); - std::vector fusion_ids; - for (auto &buffer_fusion_info : buffer_fusion_infos) { - fusion_ids.push_back(buffer_fusion_info.first); - } - std::sort(fusion_ids.begin(), fusion_ids.end()); - for (auto &fusion_id : fusion_ids) { - buffer_fusion.ReplaceFusionOp(buffer_fusion_infos[fusion_id], kernel_ptr, graph_ptr.get()); - } - draw::Draw("after_BufferFusionconv8.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 10); -} - -#ifdef BUFFER_FUSION_MULTI_OUT -TEST_F(TestHWBufferFusion, BufferFusionMultiOutWithLinearInput) { - KernelGraphPtr graph_ptr = CreateKernelGraphForMultiOutputWithLinearInput(1, 1, true, mindspore::kernel::OPAQUE); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 19); - - ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true); - kernel::KernelPackPtr kernel_pack = std::make_shared(); - auto kernel_ptr = std::make_shared(kernel_pack); - std::unordered_map buffer_fusion_infos; - buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos); - for (auto &buffer_fusion_info : buffer_fusion_infos) { - EXPECT_EQ(buffer_fusion_info.second.anf_nodes.size(), 3); - EXPECT_EQ(buffer_fusion_info.second.inputs_list.size(), 1); - EXPECT_EQ(buffer_fusion_info.second.outputs_list.size(), 2); - buffer_fusion.ReplaceFusionOp(buffer_fusion_info.second, kernel_ptr, graph_ptr.get()); - } - ASSERT_EQ(manager->all_nodes().size(), 21); -} - -TEST_F(TestHWBufferFusion, BufferFusionMultiOut) { - KernelGraphPtr graph_ptr = CreateKernelGraphForMultiOutput(1, 1, true, mindspore::kernel::OPAQUE); - draw::Draw("before_BufferFusionMultiOut.dot", graph_ptr); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 17); - ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true); - kernel::KernelPackPtr kernel_pack = std::make_shared(); - auto kernel_ptr = std::make_shared(kernel_pack); - std::unordered_map buffer_fusion_infos; - buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos); - for (auto &buffer_fusion_info : buffer_fusion_infos) { - EXPECT_EQ(buffer_fusion_info.second.anf_nodes.size(), 2); - EXPECT_EQ(buffer_fusion_info.second.inputs_list.size(), 1); - EXPECT_EQ(buffer_fusion_info.second.outputs_list.size(), 2); - buffer_fusion.ReplaceFusionOp(buffer_fusion_info.second, kernel_ptr, graph_ptr.get()); - } - draw::Draw("after_BufferFusionMultiOut.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 21); -} -#endif - -TEST_F(TestHWBufferFusion, BufferMultiUse) { - KernelGraphPtr graph_ptr = CreateKernelGraphForMultiUse(3, 4); - draw::Draw("before_BufferMultiUse.dot", graph_ptr); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true); - kernel::KernelPackPtr kernel_pack = std::make_shared(); - auto kernel_ptr = std::make_shared(kernel_pack); - std::unordered_map buffer_fusion_infos; - buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos); - std::vector fusion_ids; - for (auto &buffer_fusion_info : buffer_fusion_infos) { - fusion_ids.push_back(buffer_fusion_info.first); - } - std::sort(fusion_ids.begin(), fusion_ids.end()); - for (auto &fusion_id : fusion_ids) { - buffer_fusion.ReplaceFusionOp(buffer_fusion_infos[fusion_id], kernel_ptr, graph_ptr.get()); - } - draw::Draw("after_BufferMultiUse.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 15); -} - -TEST_F(TestHWBufferFusion, BufferFusionReduce) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(2, true, mindspore::kernel::COMMREDUCE); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true); - kernel::KernelPackPtr kernel_pack = std::make_shared(); - auto kernel_ptr = std::make_shared(kernel_pack); - std::unordered_map buffer_fusion_infos; - buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos); - for (auto &buffer_fusion_info : buffer_fusion_infos) { - EXPECT_EQ(buffer_fusion_info.second.anf_nodes.size(), 3); - EXPECT_EQ(buffer_fusion_info.second.inputs_list.size(), 1); - EXPECT_EQ(buffer_fusion_info.second.outputs_list.size(), 1); - buffer_fusion.ReplaceFusionOp(buffer_fusion_info.second, kernel_ptr, graph_ptr.get()); - } - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionSegment) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(2, true, mindspore::kernel::SEGMENT); - ASSERT_TRUE(nullptr != graph_ptr); - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true); - kernel::KernelPackPtr kernel_pack = std::make_shared(); - auto kernel_ptr = std::make_shared(kernel_pack); - std::unordered_map buffer_fusion_infos; - buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos); - for (auto &buffer_fusion_info : buffer_fusion_infos) { - EXPECT_EQ(buffer_fusion_info.second.anf_nodes.size(), 3); - EXPECT_EQ(buffer_fusion_info.second.inputs_list.size(), 1); - EXPECT_EQ(buffer_fusion_info.second.outputs_list.size(), 1); - buffer_fusion.ReplaceFusionOp(buffer_fusion_info.second, kernel_ptr, graph_ptr.get()); - } - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionEltwise1BeforeAnd3After) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter(1); - ASSERT_TRUE(nullptr != graph_ptr); - draw::Draw("before_BufferFusionEltwiseBeforeAndAfter1.dot", graph_ptr); - - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 13); - buffer_fusion.Run(graph_ptr); - draw::Draw("after_BufferFusionEltwiseBeforeAndAfter1.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionEltwise2BeforeAnd3After) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter(2); - ASSERT_TRUE(nullptr != graph_ptr); - draw::Draw("before_BufferFusionEltwiseBeforeAndAfter2.dot", graph_ptr); - - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 15); - buffer_fusion.Run(graph_ptr); - draw::Draw("after_BufferFusionEltwiseBeforeAndAfter2.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionEltwise3BeforeAnd3After) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter(3); - ASSERT_TRUE(nullptr != graph_ptr); - draw::Draw("before_BufferFusionEltwiseBeforeAndAfter3.dot", graph_ptr); - - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 17); - buffer_fusion.Run(graph_ptr); - draw::Draw("after_BufferFusionEltwiseBeforeAndAfter3.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 5); -} - -TEST_F(TestHWBufferFusion, BufferFusionMultipleIn) { - KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionMultipleIn(2); - ASSERT_TRUE(nullptr != graph_ptr); - draw::Draw("before_BufferFusionMultipleIn.dot", graph_ptr); - - mindspore::opt::BufferFusion buffer_fusion = BufferFusion(); - std::vector graphs{graph_ptr}; - FuncGraphManagerPtr manager = std::make_shared(graphs); - manager->AddFuncGraph(graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 11); - buffer_fusion.Run(graph_ptr); - draw::Draw("after_BufferFusionMultipleIn.dot", graph_ptr); - ASSERT_EQ(manager->all_nodes().size(), 7); -} -} // namespace opt -} // namespace mindspore