Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
58a70b5f
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
58a70b5f
编写于
4月 15, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
4月 15, 2020
浏览文件
操作
浏览文件
下载
差异文件
!346 getnext parallel optimization part II: Eliminate Memcpy in specify scenario
Merge pull request !346 from laiyongqiang/develop
上级
49b8a084
3e05f50f
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
333 addition
and
7 deletion
+333
-7
mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
.../ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+6
-1
mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc
...re_activate/ascend/enhancer/getnext_memcpy_elimination.cc
+72
-0
mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.h
...pre_activate/ascend/enhancer/getnext_memcpy_elimination.h
+33
-0
mindspore/ccsrc/pre_activate/pass/allreduce_fusion.cc
mindspore/ccsrc/pre_activate/pass/allreduce_fusion.cc
+1
-1
mindspore/ccsrc/pre_activate/pass/allreduce_fusion.h
mindspore/ccsrc/pre_activate/pass/allreduce_fusion.h
+3
-3
mindspore/ccsrc/session/gpu_session.cc
mindspore/ccsrc/session/gpu_session.cc
+1
-1
mindspore/ccsrc/utils/utils.h
mindspore/ccsrc/utils/utils.h
+1
-0
tests/ut/cpp/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc
...re_activate/ascend/enhancer/getnext_memcpy_elimination.cc
+98
-0
tests/ut/cpp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc
...pp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc
+1
-1
tests/ut/cpp/python_input/gtest_input/pre_activate/getnext_memcpy_elimination_test.py
...est_input/pre_activate/getnext_memcpy_elimination_test.py
+117
-0
未找到文件。
mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
浏览文件 @
58a70b5f
...
...
@@ -21,7 +21,7 @@
#include "pre_activate/ascend/ir_fission/bn_grad_split.h"
#include "pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h"
#include "pre_activate/ascend/ir_fission/layer_norm_grad_split.h"
#include "pre_activate/
common/ir_fusion
/allreduce_fusion.h"
#include "pre_activate/
pass
/allreduce_fusion.h"
#include "pre_activate/ascend/ir_fusion/square_sum_fusion.h"
#include "pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
#include "pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h"
...
...
@@ -58,8 +58,10 @@
#include "pre_activate/ascend/ir_fission/add_memcpy_async.h"
#include "pre_activate/ascend/format_type/insert_cast_for_runop.h"
#include "pre_activate/ascend/format_type/insert_transdata_for_runop.h"
#include "pre_activate/ascend/enhancer/getnext_memcpy_elimination.h"
#include "pre_activate/ascend/ir_fission/addn_fission.h"
#include "utils/context/ms_context.h"
#include "utils/config_manager.h"
#include "debug/anf_ir_dump.h"
#include "debug/anf_ir_utils.h"
...
...
@@ -244,6 +246,9 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
other_pm
->
AddPass
(
std
::
make_shared
<
BufferFusion
>
());
other_pm
->
AddPass
(
std
::
make_shared
<
GetitemTuple
>
());
other_pm
->
AddPass
(
std
::
make_shared
<
CommonSubexpressionElimination
>
());
if
(
context_ptr
->
enable_task_sink
()
&&
context_ptr
->
loop_sink_flag
()
&&
ConfigManager
::
GetInstance
().
iter_num
()
>
1
)
{
other_pm
->
AddPass
(
std
::
make_shared
<
GetnextMemcpyElimination
>
());
}
other_pm
->
AddPass
(
std
::
make_shared
<
CheckConsistency
>
());
optimizer
->
AddPassManager
(
other_pm
);
(
void
)
optimizer
->
Optimize
(
kernel_graph
);
...
...
mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc
0 → 100644
浏览文件 @
58a70b5f
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "pre_activate/ascend/enhancer/getnext_memcpy_elimination.h"
#include <memory>
#include "session/anf_runtime_algorithm.h"
#include "optimizer/opt.h"
namespace
mindspore
::
opt
{
const
BaseRef
GetnextMemcpyElimination
::
DefinePattern
()
const
{
auto
prim_memcpy
=
std
::
make_shared
<
Primitive
>
(
kMemCpyAsyncOpName
);
VarPtr
x
=
std
::
make_shared
<
SeqVar
>
();
VectorRef
memcpy_async
({
prim_memcpy
,
x
});
return
memcpy_async
;
}
const
AnfNodePtr
GetnextMemcpyElimination
::
Process
(
const
FuncGraphPtr
&
graph
,
const
AnfNodePtr
&
node
,
const
EquivPtr
&
equiv
)
const
{
if
(
graph
==
nullptr
||
node
==
nullptr
||
equiv
==
nullptr
)
{
return
nullptr
;
}
auto
memcpy_cnode
=
node
->
cast
<
CNodePtr
>
();
if
(
memcpy_cnode
==
nullptr
)
{
return
nullptr
;
}
// 1. memcpy has attr kAttrLabelForInsertStreamActive
if
(
!
AnfAlgo
::
HasNodeAttr
(
kAttrLabelForInsertStreamActive
,
node
))
{
MS_LOG
(
DEBUG
)
<<
"node has no label_for_insert_stream_active attr"
;
return
nullptr
;
}
// 2. memcpy's output has only one user next_node
auto
manager
=
graph
->
manager
();
MS_EXCEPTION_IF_NULL
(
manager
);
if
(
manager
->
node_users
().
find
(
memcpy_cnode
)
==
manager
->
node_users
().
end
())
{
MS_LOG
(
EXCEPTION
)
<<
"memcpy has no output in manager"
;
}
auto
next_nodes
=
manager
->
node_users
()[
memcpy_cnode
];
if
(
next_nodes
.
size
()
>
1
)
{
MS_LOG
(
DEBUG
)
<<
"node's output has more than one users"
;
return
nullptr
;
}
// 3. next_node has only one input which is memcpy's output
for
(
auto
&
item
:
next_nodes
)
{
auto
next_node
=
item
.
first
->
cast
<
CNodePtr
>
();
if
(
next_node
->
inputs
().
size
()
!=
2
)
{
MS_LOG
(
DEBUG
)
<<
"next node has more than one input"
;
return
nullptr
;
}
// add attr label_for_insert_stream_active for next_node
AnfAlgo
::
SetNodeAttr
(
kAttrLabelForInsertStreamActive
,
MakeValue
(
true
),
next_node
);
}
return
memcpy_cnode
->
input
(
1
);
}
}
// namespace mindspore::opt
mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.h
0 → 100644
浏览文件 @
58a70b5f
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_GETNEXT_MEMCPY_ELIMINATION_H
#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_GETNEXT_MEMCPY_ELIMINATION_H
#include "pre_activate/common/optimizer.h"
namespace
mindspore
{
namespace
opt
{
class
GetnextMemcpyElimination
:
public
PatternProcessPass
{
public:
explicit
GetnextMemcpyElimination
(
bool
multigraph
=
true
)
:
PatternProcessPass
(
"getnext_memcpy_elimination"
,
multigraph
)
{}
~
GetnextMemcpyElimination
()
override
=
default
;
const
BaseRef
DefinePattern
()
const
override
;
const
AnfNodePtr
Process
(
const
FuncGraphPtr
&
,
const
AnfNodePtr
&
,
const
EquivPtr
&
)
const
override
;
};
}
// namespace opt
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_GETNEXT_MEMCPY_ELIMINATION_H
mindspore/ccsrc/pre_activate/
common/ir_fusion
/allreduce_fusion.cc
→
mindspore/ccsrc/pre_activate/
pass
/allreduce_fusion.cc
浏览文件 @
58a70b5f
...
...
@@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "pre_activate/
common/ir_fusion
/allreduce_fusion.h"
#include "pre_activate/
pass
/allreduce_fusion.h"
#include <vector>
#include <string>
...
...
mindspore/ccsrc/pre_activate/
common/ir_fusion
/allreduce_fusion.h
→
mindspore/ccsrc/pre_activate/
pass
/allreduce_fusion.h
浏览文件 @
58a70b5f
...
...
@@ -13,8 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_
COMMON_IR_FUSION
_ALLREDUCE_FUSION_H_
#define MINDSPORE_CCSRC_PRE_ACTIVATE_
COMMON_IR_FUSION
_ALLREDUCE_FUSION_H_
#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_
PASS
_ALLREDUCE_FUSION_H_
#define MINDSPORE_CCSRC_PRE_ACTIVATE_
PASS
_ALLREDUCE_FUSION_H_
#include <vector>
#include "pre_activate/common/pass.h"
...
...
@@ -46,4 +46,4 @@ class AllReduceFusion : public Pass {
};
}
// namespace opt
}
// namespace mindspore
#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_
COMMON_IR_FUSION
_ALLREDUCE_FUSION_H_
#endif // MINDSPORE_CCSRC_PRE_ACTIVATE_
PASS
_ALLREDUCE_FUSION_H_
mindspore/ccsrc/session/gpu_session.cc
浏览文件 @
58a70b5f
...
...
@@ -20,7 +20,7 @@
#include "device/gpu/gpu_stream_assign.h"
#include "pre_activate/common/optimizer.h"
#include "pre_activate/common/pass_manager.h"
#include "pre_activate/
common/ir_fusion
/allreduce_fusion.h"
#include "pre_activate/
pass
/allreduce_fusion.h"
#include "device/kernel_runtime_manager.h"
#include "predict/predict.h"
#include "common/utils.h"
...
...
mindspore/ccsrc/utils/utils.h
浏览文件 @
58a70b5f
...
...
@@ -148,6 +148,7 @@ constexpr auto kAttrSrcFormat = "src_format";
constexpr
auto
kAttrOutputUsedNum
=
"output_used_num"
;
constexpr
auto
kAttrHasBias
=
"has_bias"
;
constexpr
auto
kAttrN
=
"N"
;
constexpr
auto
kAttrLabelForInsertStreamActive
=
"label_for_insert_stream_active"
;
// attr value
constexpr
auto
kValueTargetSwitch
=
"target_switch"
;
...
...
tests/ut/cpp/pre_activate/ascend/enhancer/getnext_memcpy_elimination.cc
0 → 100644
浏览文件 @
58a70b5f
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common/backend_common_test.h"
#include "common/py_func_graph_fetcher.h"
#include "session/anf_runtime_algorithm.h"
#include "operator/ops.h"
#include "ir/meta_tensor.h"
#include "debug/anf_ir_dump.h"
#include "utils/utils.h"
#include "kernel/kernel_build_info.h"
#include "pre_activate/common/optimizer.h"
#include "mindspore/ccsrc/pre_activate/ascend/enhancer/getnext_memcpy_elimination.h"
namespace
mindspore
{
namespace
opt
{
class
TestGetNextMemcpyElimination
:
public
BackendCommon
{
public:
TestGetNextMemcpyElimination
()
:
get_py_fun_
(
"gtest_input.pre_activate.getnext_memcpy_elimination_test"
,
true
)
{}
public:
UT
::
PyFuncGraphFetcher
get_py_fun_
;
};
TEST_F
(
TestGetNextMemcpyElimination
,
test_getnext_memcpy_elimination
)
{
FuncGraphPtr
g_before
=
get_py_fun_
.
CallAndParseRet
(
"test_getnext_memcpy_elimination"
,
"before"
);
ASSERT_TRUE
(
g_before
!=
nullptr
);
auto
optimizer
=
std
::
make_shared
<
opt
::
GraphOptimizer
>
();
auto
pm
=
std
::
make_shared
<
opt
::
PassManager
>
();
auto
pass
=
std
::
make_shared
<
opt
::
GetnextMemcpyElimination
>
();
pm
->
AddPass
(
pass
);
optimizer
->
AddPassManager
(
pm
);
auto
new_graph
=
optimizer
->
Optimize
(
g_before
);
FuncGraphPtr
g_after
=
get_py_fun_
.
CallAndParseRet
(
"test_getnext_memcpy_elimination"
,
"after"
);
EXPECT_TRUE
(
CheckEqualGraph
(
g_after
,
new_graph
));
}
TEST_F
(
TestGetNextMemcpyElimination
,
test_getnext_memcpy_elimination_no_attr
)
{
FuncGraphPtr
g_before
=
get_py_fun_
.
CallAndParseRet
(
"test_getnext_memcpy_elimination_no_attr"
,
"before"
);
ASSERT_TRUE
(
g_before
!=
nullptr
);
auto
optimizer
=
std
::
make_shared
<
opt
::
GraphOptimizer
>
();
auto
pm
=
std
::
make_shared
<
opt
::
PassManager
>
();
auto
pass
=
std
::
make_shared
<
opt
::
GetnextMemcpyElimination
>
();
pm
->
AddPass
(
pass
);
optimizer
->
AddPassManager
(
pm
);
auto
new_graph
=
optimizer
->
Optimize
(
g_before
);
FuncGraphPtr
g_after
=
get_py_fun_
.
CallAndParseRet
(
"test_getnext_memcpy_elimination_no_attr"
,
"after"
);
EXPECT_TRUE
(
CheckEqualGraph
(
g_after
,
new_graph
));
}
TEST_F
(
TestGetNextMemcpyElimination
,
test_getnext_memcpy_elimination_memcpy_multi_users
)
{
FuncGraphPtr
g_before
=
get_py_fun_
.
CallAndParseRet
(
"test_getnext_memcpy_elimination_memcpy_multi_users"
,
"before"
);
ASSERT_TRUE
(
g_before
!=
nullptr
);
auto
optimizer
=
std
::
make_shared
<
opt
::
GraphOptimizer
>
();
auto
pm
=
std
::
make_shared
<
opt
::
PassManager
>
();
auto
pass
=
std
::
make_shared
<
opt
::
GetnextMemcpyElimination
>
();
pm
->
AddPass
(
pass
);
optimizer
->
AddPassManager
(
pm
);
auto
new_graph
=
optimizer
->
Optimize
(
g_before
);
FuncGraphPtr
g_after
=
get_py_fun_
.
CallAndParseRet
(
"test_getnext_memcpy_elimination_memcpy_multi_users"
,
"after"
);
EXPECT_TRUE
(
CheckEqualGraph
(
g_after
,
new_graph
));
}
TEST_F
(
TestGetNextMemcpyElimination
,
test_getnext_memcpy_elimination_next_multi_inputs
)
{
FuncGraphPtr
g_before
=
get_py_fun_
.
CallAndParseRet
(
"test_getnext_memcpy_elimination_next_multi_inputs"
,
"before"
);
ASSERT_TRUE
(
g_before
!=
nullptr
);
auto
optimizer
=
std
::
make_shared
<
opt
::
GraphOptimizer
>
();
auto
pm
=
std
::
make_shared
<
opt
::
PassManager
>
();
auto
pass
=
std
::
make_shared
<
opt
::
GetnextMemcpyElimination
>
();
pm
->
AddPass
(
pass
);
optimizer
->
AddPassManager
(
pm
);
auto
new_graph
=
optimizer
->
Optimize
(
g_before
);
FuncGraphPtr
g_after
=
get_py_fun_
.
CallAndParseRet
(
"test_getnext_memcpy_elimination_next_multi_inputs"
,
"after"
);
EXPECT_TRUE
(
CheckEqualGraph
(
g_after
,
new_graph
));
}
}
// namespace opt
}
// namespace mindspore
tests/ut/cpp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc
浏览文件 @
58a70b5f
...
...
@@ -20,7 +20,7 @@
#include "ir/manager.h"
#include "debug/anf_ir_dump.h"
#include "session/anf_runtime_algorithm.h"
#include "pre_activate/
common/ir_fusion
/allreduce_fusion.h"
#include "pre_activate/
pass
/allreduce_fusion.h"
#include "pre_activate/common/optimizer.h"
#include "device/kernel_info.h"
#include "pre_activate/common/pass_manager.h"
...
...
tests/ut/cpp/python_input/gtest_input/pre_activate/getnext_memcpy_elimination_test.py
0 → 100644
浏览文件 @
58a70b5f
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
from
mindspore.ops
import
operations
as
P
from
mindspore.ops
import
Primitive
import
mindspore
as
ms
get_next
=
P
.
GetNext
([
ms
.
float32
],
[[
1
,
64
,
112
,
112
]],
1
,
""
)
memcpy_async_attr
=
Primitive
(
'memcpy_async'
)
memcpy_async_attr
.
add_prim_attr
(
"label_for_insert_stream_active"
,
True
)
memcpy_async
=
Primitive
(
'memcpy_async'
)
cast
=
P
.
Cast
()
add
=
P
.
TensorAdd
()
class
FnDict
:
def
__init__
(
self
):
self
.
fnDict
=
{}
def
__call__
(
self
,
fn
):
self
.
fnDict
[
fn
.
__name__
]
=
fn
def
__getitem__
(
self
,
name
):
return
self
.
fnDict
[
name
]
def
test_getnext_memcpy_elimination
(
tag
):
fns
=
FnDict
()
@
fns
def
before
(
x
):
res
=
get_next
()
res
=
memcpy_async_attr
(
res
)
res
=
cast
(
res
)
return
res
@
fns
def
after
(
x
):
res
=
get_next
()
res
=
cast
(
res
)
return
res
return
fns
[
tag
]
def
test_getnext_memcpy_elimination_no_attr
(
tag
):
fns
=
FnDict
()
@
fns
def
before
(
x
):
res
=
get_next
()
res
=
memcpy_async
(
res
)
res
=
cast
(
res
)
return
res
@
fns
def
after
(
x
):
res
=
get_next
()
res
=
memcpy_async
(
res
)
res
=
cast
(
res
)
return
res
return
fns
[
tag
]
def
test_getnext_memcpy_elimination_memcpy_multi_users
(
tag
):
fns
=
FnDict
()
@
fns
def
before
(
x
):
res
=
get_next
()
memcpy_out
=
memcpy_async_attr
(
res
)
res
=
cast
(
memcpy_out
)
res
=
add
(
memcpy_out
,
res
)
return
res
@
fns
def
after
(
x
):
res
=
get_next
()
memcpy_out
=
memcpy_async_attr
(
res
)
res
=
cast
(
memcpy_out
)
res
=
add
(
memcpy_out
,
res
)
return
res
return
fns
[
tag
]
def
test_getnext_memcpy_elimination_next_multi_inputs
(
tag
):
fns
=
FnDict
()
@
fns
def
before
(
x
):
res
=
get_next
()
memcpy_out
=
memcpy_async_attr
(
res
)
res
=
add
(
memcpy_out
,
res
)
return
res
@
fns
def
after
(
x
):
res
=
get_next
()
memcpy_out
=
memcpy_async_attr
(
res
)
res
=
add
(
memcpy_out
,
res
)
return
res
return
fns
[
tag
]
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录