diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc index 76301a8b47d38cb5a5bdb02bfe86f94b07cbb7df..10e5e12db5ed7f72690a3b63d41264f47813840d 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc @@ -145,7 +145,6 @@ void RunOpAscendDataLayout(const std::shared_ptr &kernel_g MS_EXCEPTION_IF_NULL(kernel_graph); auto optimizer = std::make_shared(); auto data_layout_pm = std::make_shared("pynative_transop_pm"); - data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); @@ -182,7 +181,6 @@ void AscendDataLayout(const std::shared_ptr &kernel_graph) MS_EXCEPTION_IF_NULL(kernel_graph); auto optimizer = std::make_shared(); auto data_layout_pm = std::make_shared("transop_pm"); - data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); data_layout_pm->AddPass(std::make_shared()); @@ -238,6 +236,7 @@ void AscendBackendIRFusionOptimization(const std::shared_ptrAddPass(std::make_shared()); } else { ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); @@ -282,6 +281,7 @@ void RunOpAscendBackendIRFusionOptimization(const std::shared_ptr(); auto ir_fusion_pm = std::make_shared("ir_fusion_pm"); ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.cc index cc1356c7247e1c3d02d36706618bd5f4e5bcdb92..1a25d836509c10d832afff34a8d4315255efea05 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.cc @@ -32,7 +32,6 @@ void LayerNormGradSplit::CreateOutputsOfLayerNormXBackprop( std::vector *layer_norm_x_backprop_outputs) const { MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(layer_norm_grad); - MS_EXCEPTION_IF_NULL(kernel_select_); auto prim = std::make_shared(kLayerNormXBackpropOpName); std::vector layer_norm_x_backprop_inputs = {NewValueNode(prim)}; for (size_t i = 1; i < layer_norm_grad->inputs().size(); ++i) { @@ -46,7 +45,6 @@ void LayerNormGradSplit::CreateOutputsOfLayerNormXBackprop( auto shapes = {AnfAlgo::GetOutputInferShape(layer_norm_grad, 0)}; AnfAlgo::SetOutputInferTypeAndShape(types, shapes, layer_norm_x_backprop.get()); - kernel_select_->SelectKernel(layer_norm_x_backprop); (*layer_norm_x_backprop_outputs).push_back(layer_norm_x_backprop); } @@ -55,7 +53,6 @@ void LayerNormGradSplit::CreateOutputsOfLayerNormBetaGammaBackprop( std::vector *layer_norm_beta_gamma_backprop_outputs) const { MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(layer_norm_grad); - MS_EXCEPTION_IF_NULL(kernel_select_); auto prim = std::make_shared(kLayerNormBetaGammaBackpropOpName); std::vector layer_norm_beta_gamma_backprop_inputs = {NewValueNode(prim)}; for (size_t i = 1; i < layer_norm_grad->inputs().size() - 1; ++i) { @@ -73,10 +70,9 @@ void LayerNormGradSplit::CreateOutputsOfLayerNormBetaGammaBackprop( AnfAlgo::SetOutputInferTypeAndShape(types, shapes, layer_norm_beta_gamma_backprop.get()); // get device shape of LayerNormGrad's 5th Input, and convert it to attr - std::vector shape_gamma = AnfAlgo::GetInputDeviceShape(layer_norm_grad, 4); + std::vector shape_gamma = AnfAlgo::GetPrevNodeOutputInferShape(layer_norm_grad, 4); AnfAlgo::SetNodeAttr(kAttrShapeGamma, MakeValue(opt::Convert2Int(shape_gamma)), layer_norm_beta_gamma_backprop); - kernel_select_->SelectKernel(layer_norm_beta_gamma_backprop); CreateMultipleOutputsOfAnfNode(graph, layer_norm_beta_gamma_backprop, kLayerNormBetaGammaBackpropOutputNum, layer_norm_beta_gamma_backprop_outputs); } diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.h b/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.h index f25c2e98381bbfede2812f0c1cee247443cf0b2a..f442446b01a2bf7848912445eb4e6df207b21d62 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.h +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/layer_norm_grad_split.h @@ -26,8 +26,7 @@ namespace mindspore { namespace opt { class LayerNormGradSplit : public PatternProcessPass { public: - explicit LayerNormGradSplit(bool multigraph = true) - : PatternProcessPass("layer_norm_grad_split", multigraph), kernel_select_(std::make_shared()) {} + explicit LayerNormGradSplit(bool multigraph = true) : PatternProcessPass("layer_norm_grad_split", multigraph) {} ~LayerNormGradSplit() override = default; const BaseRef DefinePattern() const override; const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; @@ -37,7 +36,6 @@ class LayerNormGradSplit : public PatternProcessPass { std::vector *layer_norm_grad_outputs) const; void CreateOutputsOfLayerNormBetaGammaBackprop(const FuncGraphPtr &graph, const CNodePtr &layer_norm_grad, std::vector *layer_norm_beta_gamma_outputs) const; - KernelSelectPtr kernel_select_; }; } // namespace opt } // namespace mindspore diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/layer_norm_grad_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/layer_norm_grad_split_test.cc index 3ad13f7a64c524bc234a8c20678fe8f071a2d8a0..1df87960e393622045fb7a176c3e359382710214 100644 --- a/tests/ut/cpp/pre_activate/ascend/ir_fission/layer_norm_grad_split_test.cc +++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/layer_norm_grad_split_test.cc @@ -39,36 +39,6 @@ class TestHWLayerNormGradSplit : public BackendCommon { UT::PyFuncGraphFetcher get_py_fun_; }; -class MockLayerNormGradSplitKernelSelect : public KernelSelect { - public: - MockLayerNormGradSplitKernelSelect() = default; - ~MockLayerNormGradSplitKernelSelect() override = default; - void SelectKernel(const CNodePtr &cnode) override { - auto name = AnfAlgo::GetCNodeName(cnode); - - if (name == kLayerNormXBackpropOpName) { - kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; - builder.SetInputsFormat( - {kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0}); - builder.SetInputsDeviceType( - {kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16}); - builder.SetOutputsFormat({kOpFormat_NC1HWC0}); - builder.SetOutputsDeviceType({kNumberTypeFloat16}); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), cnode.get()); - return; - } - if (name == kLayerNormBetaGammaBackpropOpName) { - kernel::KernelBuildInfo::KernelBuildInfoBuilder builder; - builder.SetInputsFormat({kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0}); - builder.SetInputsDeviceType({kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16}); - builder.SetOutputsFormat({kOpFormat_NC1HWC0, kOpFormat_NC1HWC0}); - builder.SetOutputsDeviceType({kNumberTypeFloat16, kNumberTypeFloat16}); - AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), cnode.get()); - return; - } - } -}; // namespace opt - TEST_F(TestHWLayerNormGradSplit, test_layer_norm_grad_split) { get_py_fun_.SetDoResolve(true); FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_layer_norm_grad_split", "before"); @@ -81,49 +51,9 @@ TEST_F(TestHWLayerNormGradSplit, test_layer_norm_grad_split) { auto kernel_graph = GetKernelGraph(g, args_spec_list); EXPECT_NE(kernel_graph, nullptr); - // get LayerNormGrad - CNodePtr ret = kernel_graph->get_return(); - EXPECT_NE(ret, nullptr); - EXPECT_NE(ret->input(1), nullptr); - EXPECT_TRUE(ret->input(1)->isa()); - auto make_tuple1 = ret->input(1)->cast(); - EXPECT_NE(make_tuple1->input(1), nullptr); - EXPECT_TRUE(make_tuple1->input(1)->isa()); - auto make_tuple2 = make_tuple1->input(1)->cast(); - EXPECT_NE(make_tuple2->input(1), nullptr); - EXPECT_TRUE(make_tuple2->input(1)->isa()); - auto tuple_getitem = make_tuple2->input(1)->cast(); - EXPECT_NE(tuple_getitem->input(1), nullptr); - EXPECT_TRUE(tuple_getitem->input(1)->isa()); - auto layer_norm_grad = tuple_getitem->input(1)->cast(); - - // set kernel for LayerNormGrad - kernel::KernelBuildInfo::KernelBuildInfoBuilder builder1; - builder1.SetInputsFormat( - {kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0}); - builder1.SetOutputsFormat({kOpFormat_NC1HWC0, kOpFormat_NC1HWC0, kOpFormat_NC1HWC0}); - builder1.SetInputsDeviceType( - {kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16}); - builder1.SetOutputsDeviceType({kNumberTypeFloat16, kNumberTypeFloat16, kNumberTypeFloat16}); - builder1.SetKernelType(TBE_KERNEL); - AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), layer_norm_grad.get()); - - // get param5 - EXPECT_NE(layer_norm_grad->input(5), nullptr); - auto param = layer_norm_grad->input(5); - - // set kernel for param5 - kernel::KernelBuildInfo::KernelBuildInfoBuilder builder2; - builder2.SetOutputsFormat({kOpFormat_NC1HWC0}); - builder2.SetOutputsDeviceType({kNumberTypeFloat16}); - AnfAlgo::SetSelectKernelBuildInfo(builder2.Build(), param.get()); - - // do layer_norm_grad_split pass auto optimizer = std::make_shared(); auto pm = std::make_shared(); auto pass = std::make_shared(); - auto kernel_select = std::make_shared(); - pass->kernel_select_ = kernel_select; pm->AddPass(pass); optimizer->AddPassManager(pm); auto new_graph = optimizer->Optimize(kernel_graph);