提交 2215e326 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!1419 remove old buffer fusion pass

Merge pull request !1419 from Etone.Chan/Resnet50
...@@ -62,7 +62,6 @@ ...@@ -62,7 +62,6 @@
#include "pre_activate/pass/common_subexpression_elimination.h" #include "pre_activate/pass/common_subexpression_elimination.h"
#include "pre_activate/ascend/format_type/merge_cast_to_op.h" #include "pre_activate/ascend/format_type/merge_cast_to_op.h"
#include "pre_activate/ascend/format_type/check_consistency.h" #include "pre_activate/ascend/format_type/check_consistency.h"
#include "pre_activate/ascend/buffer_fusion/buffer_fusion.h"
#include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h" #include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h"
#include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h" #include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h" #include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h"
...@@ -317,14 +316,14 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern ...@@ -317,14 +316,14 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
optimizer->AddPassManager(other_pm); optimizer->AddPassManager(other_pm);
(void)optimizer->Optimize(kernel_graph); (void)optimizer->Optimize(kernel_graph);
kernel_graph->SetExecOrderByDefault(); kernel_graph->SetExecOrderByDefault();
// buffer fusion
AscendBackendUBFusionOptimization(kernel_graph);
if (save_graphs) { if (save_graphs) {
std::string file_path = std::string file_path =
save_graphs_path + "/" + "hwopt_d_end" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir"; save_graphs_path + "/" + "hwopt_d_end" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir";
DumpIR(file_path, kernel_graph, true); DumpIR(file_path, kernel_graph, true);
DumpIRProto(kernel_graph, "after_hwopt_" + std::to_string(kernel_graph->graph_id())); DumpIRProto(kernel_graph, "after_hwopt_" + std::to_string(kernel_graph->graph_id()));
} }
// buffer fusion
AscendBackendUBFusionOptimization(kernel_graph);
} }
void AscendBackendUBFusionOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph) { void AscendBackendUBFusionOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
......
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_BUFFER_FUSION_H_
#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_BUFFER_FUSION_H_
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "ir/anf.h"
#include "pre_activate/common/pass.h"
#include "pre_activate/common/fusion_id_allocator.h"
#include "device/kernel_info.h"
#include "kernel/kernel.h"
#include "session/kernel_graph.h"
namespace mindspore {
namespace opt {
struct BufferFusionInfo_t {
std::vector<AnfNodePtr> anf_nodes;
std::vector<AnfNodePtr> inputs_list;
std::vector<AnfNodePtr> outputs_list;
kernel::KernelBuildInfoPtr kernel_build_info;
};
using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>;
class BufferFusion : public Pass {
public:
BufferFusion() : Pass("buffer_fusion") {}
~BufferFusion() override = default;
bool Run(const FuncGraphPtr &graph) override;
private:
void SetRecordFusionId(const std::unordered_set<AnfNodePtr> &record);
void MatchConvBnreduce(const CNodePtr &cnode, const session::KernelGraph &kernel_graph,
FusedNodeRecord *candidate_fusion);
void MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph,
FusedNodeRecord *candidate_fusion);
void MatchBnupdateAddRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input,
const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion);
void MatchDepthwiseConvRelu(const CNodePtr &cnode, const session::KernelGraph &kernel_graph,
FusedNodeRecord *candidate_fusion, bool is_order);
void MatchMatmulEltwise(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph,
FusedNodeRecord *candidate_fusion);
void MatchOpNamePattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion);
void MatchFusionTypePattern(const session::KernelGraph &kernel_graph, FusedNodeRecord *candidate_fusion);
void GetBufferFusionInfo(session::KernelGraph *kernel_graph,
std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const;
bool ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id,
const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const;
bool MatchBufferFusionPattern(const session::KernelGraph &kernel_graph);
bool FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const;
FusionIdAllocator fusion_id_allocator;
};
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_BUFFER_FUSION_BUFFER_FUSION_H_
...@@ -37,6 +37,13 @@ const int8_t ELTWISE_USE = 1; ...@@ -37,6 +37,13 @@ const int8_t ELTWISE_USE = 1;
const int8_t MAX_ELTWISE_SIZE = 6; const int8_t MAX_ELTWISE_SIZE = 6;
using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>; using FusedNodeRecord = std::vector<std::unordered_set<AnfNodePtr>>;
struct BufferFusionInfo_t {
std::vector<AnfNodePtr> anf_nodes;
std::vector<AnfNodePtr> inputs_list;
std::vector<AnfNodePtr> outputs_list;
kernel::KernelBuildInfoPtr kernel_build_info;
};
class FusionBasePass : public Pass { class FusionBasePass : public Pass {
public: public:
FusionBasePass(const std::string &name, FusionIdAllocatorPtr idAllocator) FusionBasePass(const std::string &name, FusionIdAllocatorPtr idAllocator)
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
*/ */
#include "pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h" #include "pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h"
#include <vector> #include <vector>
#include <algorithm>
#include <unordered_set> #include <unordered_set>
#include <memory> #include <memory>
#include <string> #include <string>
...@@ -51,7 +52,9 @@ void ReduceEltwiseFusionPass::MatchReduceEltwise(const CNodePtr &cnode, const se ...@@ -51,7 +52,9 @@ void ReduceEltwiseFusionPass::MatchReduceEltwise(const CNodePtr &cnode, const se
if (AnfAlgo::GetKernelType(eltwise_input) == KernelType::TBE_KERNEL && if (AnfAlgo::GetKernelType(eltwise_input) == KernelType::TBE_KERNEL &&
AnfAlgo::GetFusionType(eltwise_input) == kernel::FusionType::COMMREDUCE) { AnfAlgo::GetFusionType(eltwise_input) == kernel::FusionType::COMMREDUCE) {
(void)record.insert(eltwise_input); (void)record.insert(eltwise_input);
auto previous_eltwise_input = cnode->input(1); auto previous_input_cnode = eltwise_input->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(previous_input_cnode);
auto previous_eltwise_input = previous_input_cnode->input(1);
auto previous_size = record.size(); auto previous_size = record.size();
while (CheckEltWiseNode(manager.get(), previous_eltwise_input)) { while (CheckEltWiseNode(manager.get(), previous_eltwise_input)) {
(void)record.insert(previous_eltwise_input); (void)record.insert(previous_eltwise_input);
...@@ -71,6 +74,7 @@ void ReduceEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGrap ...@@ -71,6 +74,7 @@ void ReduceEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGrap
FusedNodeRecord *candidate_fusion) { FusedNodeRecord *candidate_fusion) {
MS_EXCEPTION_IF_NULL(candidate_fusion); MS_EXCEPTION_IF_NULL(candidate_fusion);
std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return()); std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return());
std::reverse(node_list.begin(), node_list.end());
for (auto &node : node_list) { for (auto &node : node_list) {
if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) || if (!AnfAlgo::IsRealCNodeKernel(node) || fusion_id_allocator->HasFusionIdAttr(node) ||
AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) { AnfAlgo::CheckPrimitiveType(node, prim::kPrimReturn)) {
......
...@@ -51,7 +51,9 @@ void SegmentEltwiseFusionPass::MatchSegmentEltwise(const CNodePtr &cnode, const ...@@ -51,7 +51,9 @@ void SegmentEltwiseFusionPass::MatchSegmentEltwise(const CNodePtr &cnode, const
if (AnfAlgo::GetKernelType(eltwise_input) == KernelType::TBE_KERNEL && if (AnfAlgo::GetKernelType(eltwise_input) == KernelType::TBE_KERNEL &&
AnfAlgo::GetFusionType(eltwise_input) == kernel::FusionType::SEGMENT) { AnfAlgo::GetFusionType(eltwise_input) == kernel::FusionType::SEGMENT) {
(void)record.insert(eltwise_input); (void)record.insert(eltwise_input);
auto previous_eltwise_input = cnode->input(1); auto previous_input_cnode = eltwise_input->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(previous_input_cnode);
auto previous_eltwise_input = previous_input_cnode->input(1);
auto previous_size = record.size(); auto previous_size = record.size();
while (CheckEltWiseNode(manager.get(), previous_eltwise_input)) { while (CheckEltWiseNode(manager.get(), previous_eltwise_input)) {
(void)record.insert(previous_eltwise_input); (void)record.insert(previous_eltwise_input);
......
...@@ -19,13 +19,13 @@ ...@@ -19,13 +19,13 @@
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "pre_activate/ascend/buffer_fusion/fusion_base_pass.h"
#include "ir/anf.h" #include "ir/anf.h"
#include "pre_activate/common/pass.h" #include "pre_activate/common/pass.h"
#include "pre_activate/common/fusion_id_allocator.h" #include "pre_activate/common/fusion_id_allocator.h"
#include "device/kernel_info.h" #include "device/kernel_info.h"
#include "kernel/kernel.h" #include "kernel/kernel.h"
#include "session/kernel_graph.h" #include "session/kernel_graph.h"
#include "pre_activate/ascend/buffer_fusion/buffer_fusion.h"
namespace mindspore { namespace mindspore {
namespace opt { namespace opt {
......
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "pre_activate/pass/remove_nop_nodes.h"
#include "common/utils.h"
#include "pre_activate/common/helper.h"
namespace mindspore {
namespace opt {
const AnfNodePtr RemoveNopNodes::Process(const FuncGraphPtr &, const AnfNodePtr &node, const EquivPtr &) const {
if (node == nullptr || !node->isa<CNode>()) {
return nullptr;
}
CNodePtr cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (!IsNopNode(node)) {
return nullptr;
}
return cnode->input(1);
}
} // namespace opt
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_REMOVE_NOP_NODES_H_
#define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_REMOVE_NOP_NODES_H_
#include "ir/anf.h"
#include "pre_activate/common/optimizer.h"
namespace mindspore {
namespace opt {
class RemoveNopNodes : public PatternProcessPass {
public:
explicit RemoveNopNodes(bool multigraph = true) : PatternProcessPass("remove_nop_nodes", multigraph) {}
~RemoveNopNodes() override = default;
const BaseRef DefinePattern() const override;
const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
};
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_REMOVE_NOP_NODES_H_
...@@ -21,7 +21,19 @@ ...@@ -21,7 +21,19 @@
#include "device/kernel_info.h" #include "device/kernel_info.h"
#include "pre_activate/common/optimizer.h" #include "pre_activate/common/optimizer.h"
#include "session/anf_runtime_algorithm.h" #include "session/anf_runtime_algorithm.h"
#include "pre_activate/ascend/buffer_fusion/buffer_fusion.h" #include "pre_activate/ascend/buffer_fusion/ub_pattern_fusion.h"
#include "pre_activate/ascend/buffer_fusion/eltwise_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_eltwise_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/conv2dbackprop_eltwise_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/conv_single_in_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/conv_double_in_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/matmul_eltwise_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/depthwiseconv_eltwise_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/bnupdate_eltwise_eltwise_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/conv_bnreduce_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/reduce_eltwise_fusion_pass.h"
#include "pre_activate/ascend/buffer_fusion/segment_eltwise_fusion_pass.h"
namespace mindspore { namespace mindspore {
namespace opt { namespace opt {
...@@ -79,10 +91,13 @@ TEST_F(TestHWBufferFusion, test_tbe_eltwise_fusion_1) { ...@@ -79,10 +91,13 @@ TEST_F(TestHWBufferFusion, test_tbe_eltwise_fusion_1) {
cast->set_kernel_info(std::make_shared<device::KernelInfo>()); cast->set_kernel_info(std::make_shared<device::KernelInfo>());
AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), cast.get()); AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), cast.get());
auto fusion_id_allocator = std::make_shared<FusionIdAllocator>();
MS_EXCEPTION_IF_NULL(fusion_id_allocator);
fusion_id_allocator->Init();
auto optimizer = std::make_shared<opt::GraphOptimizer>(); auto optimizer = std::make_shared<opt::GraphOptimizer>();
auto pm = std::make_shared<opt::PassManager>(); auto pm = std::make_shared<opt::PassManager>();
auto buffer_fusion_pass = std::make_shared<opt::BufferFusion>(); pm->AddPass(std::make_shared<EltwiseFusionPass>(fusion_id_allocator));
pm->AddPass(buffer_fusion_pass); pm->AddPass(std::make_shared<UbPatternFusion>());
optimizer->AddPassManager(pm); optimizer->AddPassManager(pm);
FuncGraphPtr new_graph = optimizer->Optimize(kg); FuncGraphPtr new_graph = optimizer->Optimize(kg);
...@@ -168,10 +183,13 @@ TEST_F(TestHWBufferFusion, test_tbe_eltwise_fusion_2) { ...@@ -168,10 +183,13 @@ TEST_F(TestHWBufferFusion, test_tbe_eltwise_fusion_2) {
biasadd->set_kernel_info(std::make_shared<device::KernelInfo>()); biasadd->set_kernel_info(std::make_shared<device::KernelInfo>());
AnfAlgo::SetSelectKernelBuildInfo(builder2.Build(), biasadd.get()); AnfAlgo::SetSelectKernelBuildInfo(builder2.Build(), biasadd.get());
auto fusion_id_allocator = std::make_shared<FusionIdAllocator>();
MS_EXCEPTION_IF_NULL(fusion_id_allocator);
fusion_id_allocator->Init();
auto optimizer = std::make_shared<opt::GraphOptimizer>(); auto optimizer = std::make_shared<opt::GraphOptimizer>();
auto pm = std::make_shared<opt::PassManager>(); auto pm = std::make_shared<opt::PassManager>();
auto buffer_fusion_pass = std::make_shared<opt::BufferFusion>(); pm->AddPass(std::make_shared<ReduceEltwiseFusionPass>(fusion_id_allocator));
pm->AddPass(buffer_fusion_pass); pm->AddPass(std::make_shared<UbPatternFusion>());
optimizer->AddPassManager(pm); optimizer->AddPassManager(pm);
FuncGraphPtr new_graph = optimizer->Optimize(kg); FuncGraphPtr new_graph = optimizer->Optimize(kg);
...@@ -255,10 +273,13 @@ TEST_F(TestHWBufferFusion, test_tbe_reduce_eltwise_fusion) { ...@@ -255,10 +273,13 @@ TEST_F(TestHWBufferFusion, test_tbe_reduce_eltwise_fusion) {
biasaddgrad->set_kernel_info(std::make_shared<device::KernelInfo>()); biasaddgrad->set_kernel_info(std::make_shared<device::KernelInfo>());
AnfAlgo::SetSelectKernelBuildInfo(builder2.Build(), biasaddgrad.get()); AnfAlgo::SetSelectKernelBuildInfo(builder2.Build(), biasaddgrad.get());
auto fusion_id_allocator = std::make_shared<FusionIdAllocator>();
MS_EXCEPTION_IF_NULL(fusion_id_allocator);
fusion_id_allocator->Init();
auto optimizer = std::make_shared<opt::GraphOptimizer>(); auto optimizer = std::make_shared<opt::GraphOptimizer>();
auto pm = std::make_shared<opt::PassManager>(); auto pm = std::make_shared<opt::PassManager>();
auto buffer_fusion_pass = std::make_shared<opt::BufferFusion>(); pm->AddPass(std::make_shared<ReduceEltwiseFusionPass>(fusion_id_allocator));
pm->AddPass(buffer_fusion_pass); pm->AddPass(std::make_shared<UbPatternFusion>());
optimizer->AddPassManager(pm); optimizer->AddPassManager(pm);
FuncGraphPtr new_graph = optimizer->Optimize(kg); FuncGraphPtr new_graph = optimizer->Optimize(kg);
...@@ -321,10 +342,13 @@ TEST_F(TestHWBufferFusion, test_tbe_matmul_eltwise_fusion) { ...@@ -321,10 +342,13 @@ TEST_F(TestHWBufferFusion, test_tbe_matmul_eltwise_fusion) {
cast->set_kernel_info(std::make_shared<device::KernelInfo>()); cast->set_kernel_info(std::make_shared<device::KernelInfo>());
AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), cast.get()); AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), cast.get());
auto fusion_id_allocator = std::make_shared<FusionIdAllocator>();
MS_EXCEPTION_IF_NULL(fusion_id_allocator);
fusion_id_allocator->Init();
auto optimizer = std::make_shared<opt::GraphOptimizer>(); auto optimizer = std::make_shared<opt::GraphOptimizer>();
auto pm = std::make_shared<opt::PassManager>(); auto pm = std::make_shared<opt::PassManager>();
auto buffer_fusion_pass = std::make_shared<opt::BufferFusion>(); pm->AddPass(std::make_shared<MatmulEltwiseFusionPass>(fusion_id_allocator));
pm->AddPass(buffer_fusion_pass); pm->AddPass(std::make_shared<UbPatternFusion>());
optimizer->AddPassManager(pm); optimizer->AddPassManager(pm);
FuncGraphPtr new_graph = optimizer->Optimize(kg); FuncGraphPtr new_graph = optimizer->Optimize(kg);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册