From ee5f3fa2406104cec0c90afc72d4fa7b2f923947 Mon Sep 17 00:00:00 2001 From: wenchunjiang Date: Thu, 16 Apr 2020 11:23:10 +0800 Subject: [PATCH] add pass to insert memcpy_async for get_next outputs --- .../pre_activate/ascend/ascend_helper.cc | 12 +++ .../ccsrc/pre_activate/ascend/ascend_helper.h | 2 + .../insert_memcpy_async_for_getnext.cc | 74 +++++++++++++++++++ .../insert_memcpy_async_for_getnext.h | 35 +++++++++ .../ascend/ir_fission/add_memcpy_async.cc | 13 +--- .../insert_memcpy_async_for_getnext.cc | 67 +++++++++++++++++ .../insert_memcpy_async_for_getnext.py | 55 ++++++++++++++ 7 files changed, 246 insertions(+), 12 deletions(-) create mode 100644 mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc create mode 100644 mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h create mode 100644 tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc create mode 100644 tests/ut/cpp/python_input/gtest_input/pre_activate/insert_memcpy_async_for_getnext.py diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc index 490a905a4..745ed4460 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_helper.cc @@ -360,5 +360,17 @@ AnfNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePt AnfAlgo::SetOutputInferTypeAndShape({origin_type}, {origin_shape}, tuple_getitem.get()); return tuple_getitem; } + +AnfNodePtr CreateMemcpyAsyncOp(const FuncGraphPtr &graph, const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(node); + auto prim = std::make_shared(kMemCpyAsyncOpName); + std::vector new_node_inputs = {NewValueNode(prim), node}; + auto new_node = graph->NewCNode(new_node_inputs); + MS_EXCEPTION_IF_NULL(new_node); + new_node->set_abstract(node->abstract()); + new_node->set_scope(node->scope()); + return new_node; +} } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h b/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h index 8925a52a7..7f5e86d72 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h @@ -65,6 +65,8 @@ AnfNodePtr InsertTransOpForOutput(const FuncGraphPtr &func_graph, const AnfNodeP CNodePtr InsertCastForInput(const FuncGraphPtr &func_graph, const CNodePtr &cnode); AnfNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, size_t output_idx); + +AnfNodePtr CreateMemcpyAsyncOp(const FuncGraphPtr &graph, const AnfNodePtr &node); } // namespace opt } // namespace mindspore #endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ASCEND_HELPER_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc b/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc new file mode 100644 index 000000000..5065bab0f --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc @@ -0,0 +1,74 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h" +#include +#include +#include "pre_activate/ascend/ascend_helper.h" +#include "session/anf_runtime_algorithm.h" + +namespace mindspore { +namespace opt { +AnfNodePtr InsertMemcpyAsyncForGetNextOutputs(const FuncGraphPtr &func_graph, const AnfNodePtr &node) { + if (func_graph == nullptr || node == nullptr) { + return nullptr; + } + + size_t output_num = AnfAlgo::GetOutputTensorNum(node); + if (output_num == 0) { + MS_LOG(DEBUG) << "Output number is zero, no need to insert memcpy_async!"; + return node; + } + + // getnext output is tuple and dynamic + std::vector make_tuple_inputs; + make_tuple_inputs.push_back(NewValueNode(prim::kPrimMakeTuple)); + + for (size_t output_index = 0; output_index < output_num; ++output_index) { + auto tuple_get_item = CreatTupleGetItemNode(func_graph, node, output_index); + auto new_node = CreateMemcpyAsyncOp(func_graph, tuple_get_item); + if (new_node == nullptr) { + MS_LOG(EXCEPTION) << "Create memcpy_async op failed!"; + } + AnfAlgo::SetNodeAttr(kAttrLabelForInsertStreamActive, MakeValue(true), new_node); + make_tuple_inputs.push_back(new_node); + } + AnfNodePtr make_tuple = func_graph->NewCNode(make_tuple_inputs); + return make_tuple; +} + +const BaseRef InsertMemcpyAsyncForGetNext::DefinePattern() const { + std::shared_ptr Xs = std::make_shared(); + auto prim = std::make_shared(kGetNextOpName); + + return VectorRef({prim, Xs}); +} + +const AnfNodePtr InsertMemcpyAsyncForGetNext::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const EquivPtr &) const { + if (func_graph == nullptr || node == nullptr || !AnfAlgo::IsRealKernel(node)) { + return nullptr; + } + + if (AnfAlgo::HasNodeAttr(kAttrVisited, node)) { + MS_LOG(DEBUG) << "Node op_name[" << kGetNextOpName << "] has visited."; + return nullptr; + } + AnfAlgo::SetNodeAttr(kAttrVisited, MakeValue(true), node); + + return InsertMemcpyAsyncForGetNextOutputs(func_graph, node); +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h b/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h new file mode 100644 index 000000000..eb3b78d33 --- /dev/null +++ b/mindspore/ccsrc/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h @@ -0,0 +1,35 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_MEMCPY_ASYNC_FOR_GETNEXT_H_ +#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_MEMCPY_ASYNC_FOR_GETNEXT_H_ + +#include "pre_activate/common/optimizer.h" + +namespace mindspore { +namespace opt { +class InsertMemcpyAsyncForGetNext : public PatternProcessPass { + public: + explicit InsertMemcpyAsyncForGetNext(bool multigraph = true) + : PatternProcessPass("insert_memcpy_async_for_getnext", multigraph) {} + ~InsertMemcpyAsyncForGetNext() override = default; + const BaseRef DefinePattern() const override; + const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override; +}; +} // namespace opt +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_INSERT_MEMCPY_ASYNC_FOR_GETNEXT_H_ diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.cc index 2ab11b603..bbea94475 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.cc @@ -18,22 +18,11 @@ #include "utils/utils.h" #include "session/anf_runtime_algorithm.h" #include "optimizer/opt.h" +#include "pre_activate/ascend/ascend_helper.h" namespace mindspore { namespace opt { namespace { -AnfNodePtr CreateMemcpyAsyncOp(const FuncGraphPtr &graph, const AnfNodePtr &node) { - MS_EXCEPTION_IF_NULL(graph); - MS_EXCEPTION_IF_NULL(node); - auto prim = std::make_shared(kMemCpyAsyncOpName); - std::vector new_node_inputs = {NewValueNode(prim), node}; - auto new_node = graph->NewCNode(new_node_inputs); - MS_EXCEPTION_IF_NULL(new_node); - new_node->set_abstract(node->abstract()); - new_node->set_scope(node->scope()); - return new_node; -} - const AnfNodePtr AddMemcpyAsyncIfInputIsUsedByOthers(const FuncGraphPtr &graph, const CNodePtr &node) { MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(node); diff --git a/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc new file mode 100644 index 000000000..2616354e4 --- /dev/null +++ b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc @@ -0,0 +1,67 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "common/backend_common_test.h" +#include "common/py_func_graph_fetcher.h" +#include "session/ascend_session.h" +#include "pipeline/resource.h" +#include "operator/ops.h" +#include "ir/manager.h" +#include "debug/anf_ir_dump.h" +#include "utils/utils.h" +#include "kernel/kernel_build_info.h" +#include "pre_activate/common/optimizer.h" +#include "pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.h" + +namespace mindspore { +namespace opt { +using KernelBuildInfoBuilder = kernel::KernelBuildInfo::KernelBuildInfoBuilder; + +class TestHWInsertMemcpyAsyncForGetNext : public BackendCommon { + public: + TestHWInsertMemcpyAsyncForGetNext() : get_py_fun_("gtest_input.pre_activate.insert_memcpy_async_for_getnext", true) {} + ~TestHWInsertMemcpyAsyncForGetNext() override = default; + + public: + UT::PyFuncGraphFetcher get_py_fun_; +}; + +TEST_F(TestHWInsertMemcpyAsyncForGetNext, test_insert_memcpy_async_for_getnext_multi_output) { + FuncGraphPtr g_before = get_py_fun_.CallAndParseRet("test_insert_memcpy_async_for_getnext", "getnext_multi_output_before"); + + AbstractBasePtrList args_spec_list{}; + auto kernel_graph = GetKernelGraph(g_before, args_spec_list); + + KernelBuildInfoBuilder builder; + builder.SetOutputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT}); + builder.SetOutputsDeviceType({kFloat32->type_id(), kInt32->type_id()}); + auto ret = kernel_graph->get_return(); + EXPECT_NE(ret->input(1), nullptr); + EXPECT_NE(ret->input(1)->cast()->input(1), nullptr); + auto get_next = ret->input(1)->cast()->input(1); + get_next->set_kernel_info(std::make_shared()); + AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), get_next.get()); + + auto optimizer = std::make_shared(); + auto pm = std::make_shared(); + pm->AddPass(std::make_shared()); + optimizer->AddPassManager(pm); + auto new_graph = optimizer->Optimize(kernel_graph); + + FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_insert_memcpy_async_for_getnext", "getnext_multi_output_after"); + EXPECT_TRUE(CheckEqualGraph(g_after, new_graph)); +} +} // namespace opt +} // namespace mindspore \ No newline at end of file diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/insert_memcpy_async_for_getnext.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/insert_memcpy_async_for_getnext.py new file mode 100644 index 000000000..902fd636d --- /dev/null +++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/insert_memcpy_async_for_getnext.py @@ -0,0 +1,55 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from mindspore.ops import operations as P +from mindspore.ops import Primitive +import mindspore as ms + +get_next = P.GetNext([ms.float32, ms.int32], [[32, 64], [32]], 2, "") +memcpy_async = Primitive('memcpy_async') +make_tuple = Primitive('make_tuple') +tuple_getitem = Primitive('tuple_getitem') + + +class FnDict: + def __init__(self): + self.fnDict = {} + + def __call__(self, fn): + self.fnDict[fn.__name__] = fn + + def __getitem__(self, name): + return self.fnDict[name] + + +def test_insert_memcpy_async_for_getnext(tag): + fns = FnDict() + + @fns + def getnext_multi_output_before(): + res = get_next() + return res + + @fns + def getnext_multi_output_after(): + res = get_next() + data = tuple_getitem(res, 0) + label = tuple_getitem(res, 1) + memcpy_async_data = memcpy_async(data) + memcpy_async_label = memcpy_async(label) + tuple = make_tuple(make_tuple(memcpy_async_data, memcpy_async_label)) + return tuple + + return fns[tag] -- GitLab