Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
66e7b02b
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
66e7b02b
编写于
8月 03, 2020
作者:
G
gukecai
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
independent stream parallel
上级
2d683234
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
369 addition
and
121 deletion
+369
-121
mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
+26
-0
mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
+1
-0
mindspore/ccsrc/backend/session/session_basic.cc
mindspore/ccsrc/backend/session/session_basic.cc
+23
-11
mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
...spore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
+195
-63
mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h
mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h
+9
-2
mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc
mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc
+2
-2
mindspore/ccsrc/runtime/device/kernel_adjust.cc
mindspore/ccsrc/runtime/device/kernel_adjust.cc
+90
-39
mindspore/ccsrc/runtime/device/kernel_adjust.h
mindspore/ccsrc/runtime/device/kernel_adjust.h
+14
-4
mindspore/ccsrc/runtime/device/kernel_runtime.cc
mindspore/ccsrc/runtime/device/kernel_runtime.cc
+8
-0
mindspore/ccsrc/utils/utils.h
mindspore/ccsrc/utils/utils.h
+1
-0
未找到文件。
mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
浏览文件 @
66e7b02b
...
...
@@ -1163,5 +1163,31 @@ bool AnfRuntimeAlgorithm::IsCondControlKernel(const CNodePtr &node) {
auto
input
=
node
->
input
(
kAnfPrimitiveIndex
);
return
IsPrimitive
(
input
,
prim
::
kPrimLabelGoto
)
||
IsPrimitive
(
input
,
prim
::
kPrimLabelSwitch
);
}
bool
AnfRuntimeAlgorithm
::
IsIndependentNode
(
const
CNodePtr
&
node
)
{
MS_EXCEPTION_IF_NULL
(
node
);
if
(
AnfAlgo
::
GetKernelType
(
node
)
!=
AICPU_KERNEL
)
{
return
false
;
}
if
(
AnfAlgo
::
GetCNodeName
(
node
)
==
kGetNextOpName
)
{
MS_LOG
(
INFO
)
<<
"GetNext should not be independent node"
;
return
false
;
}
uint32_t
input_nums
=
AnfAlgo
::
GetInputTensorNum
(
node
);
if
(
input_nums
==
0
)
{
return
true
;
}
auto
inputs
=
node
->
inputs
();
for
(
size_t
i
=
1
;
i
<
inputs
.
size
();
i
++
)
{
if
(
!
inputs
[
i
]
->
isa
<
ValueNode
>
())
{
return
false
;
}
}
return
true
;
}
}
// namespace session
}
// namespace mindspore
mindspore/ccsrc/backend/session/anf_runtime_algorithm.h
浏览文件 @
66e7b02b
...
...
@@ -212,6 +212,7 @@ class AnfRuntimeAlgorithm {
// get fix output precision from prev node, input_idx is the input index of current node related to prev node.
static
TypeId
GetPrevNodeOutputPrecision
(
const
AnfNodePtr
&
node
,
size_t
input_idx
);
static
bool
IsCondControlKernel
(
const
CNodePtr
&
node
);
static
bool
IsIndependentNode
(
const
CNodePtr
&
node
);
};
}
// namespace session
using
AnfAlgo
=
session
::
AnfRuntimeAlgorithm
;
...
...
mindspore/ccsrc/backend/session/session_basic.cc
浏览文件 @
66e7b02b
...
...
@@ -177,20 +177,32 @@ size_t LoadCtrlInputTensor(const std::shared_ptr<KernelGraph> &graph, std::vecto
if
(
inputs_params
==
nullptr
)
{
return
0
;
}
if
(
inputs_params
->
size
()
<
2
)
{
if
(
inputs_params
->
size
()
<
3
)
{
MS_LOG
(
EXCEPTION
)
<<
"Illegal inputs_params size"
;
}
auto
tensor
=
(
*
inputs_params
)[
0
];
MS_EXCEPTION_IF_NULL
(
tensor
);
auto
*
val
=
static_cast
<
int32_t
*>
(
tensor
->
data_c
());
MS_EXCEPTION_IF_NULL
(
val
);
*
val
=
0
;
tensor
->
set_dirty
(
true
);
// update current loop tensor to 0 per iterator
auto
cur_loop_tensor
=
(
*
inputs_params
)[
0
];
MS_EXCEPTION_IF_NULL
(
cur_loop_tensor
);
auto
*
cur_val
=
static_cast
<
int32_t
*>
(
cur_loop_tensor
->
data_c
());
MS_EXCEPTION_IF_NULL
(
cur_val
);
*
cur_val
=
0
;
cur_loop_tensor
->
set_dirty
(
true
);
// set loop_count to zero
MS_EXCEPTION_IF_NULL
(
inputs
);
inputs
->
push_back
(
tensor
);
inputs
->
push_back
(
cur_loop_
tensor
);
auto
epoch_tensor
=
(
*
inputs_params
)[
1
];
// update next loop tensor to 0 per iterator
auto
next_loop_tensor
=
(
*
inputs_params
)[
1
];
MS_EXCEPTION_IF_NULL
(
next_loop_tensor
);
auto
*
next_val
=
static_cast
<
int32_t
*>
(
next_loop_tensor
->
data_c
());
MS_EXCEPTION_IF_NULL
(
next_val
);
*
next_val
=
0
;
next_loop_tensor
->
set_dirty
(
true
);
// set loop_count to zero
MS_EXCEPTION_IF_NULL
(
inputs
);
inputs
->
push_back
(
next_loop_tensor
);
auto
epoch_tensor
=
(
*
inputs_params
)[
2
];
MS_EXCEPTION_IF_NULL
(
epoch_tensor
);
auto
*
epoch_val
=
static_cast
<
int32_t
*>
(
epoch_tensor
->
data_c
());
MS_EXCEPTION_IF_NULL
(
epoch_val
);
...
...
@@ -927,7 +939,7 @@ bool TensorNeedSync(const AnfNodePtr ¶meter, const tensor::TensorPtr &tensor
void
SessionBasic
::
LoadInputData
(
const
std
::
shared_ptr
<
KernelGraph
>
&
kernel_graph
,
const
std
::
vector
<
tensor
::
TensorPtr
>
&
inputs_const
)
const
{
std
::
vector
<
tensor
::
TensorPtr
>
inputs
(
inputs_const
);
size_t
input_ctrl_size
=
2
;
size_t
input_ctrl_size
=
3
;
MS_EXCEPTION_IF_NULL
(
kernel_graph
);
if
(
kernel_graph
->
input_ctrl_tensors
())
{
input_ctrl_size
=
LoadCtrlInputTensor
(
kernel_graph
,
&
inputs
);
...
...
@@ -937,7 +949,7 @@ void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_grap
auto
params
=
AnfAlgo
::
GetAllOutput
(
input_node
);
std
::
copy
(
params
.
begin
(),
params
.
end
(),
std
::
back_inserter
(
input_nodes
));
}
if
((
inputs
.
size
()
+
input_ctrl_size
)
-
2
!=
input_nodes
.
size
())
{
if
((
inputs
.
size
()
+
input_ctrl_size
)
-
3
!=
input_nodes
.
size
())
{
MS_LOG
(
EXCEPTION
)
<<
"Tensor input:"
<<
inputs
.
size
()
<<
" is not equal graph inputs:"
<<
input_nodes
.
size
()
<<
", input_ctrl_size:"
<<
input_ctrl_size
;
}
...
...
mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
浏览文件 @
66e7b02b
...
...
@@ -42,6 +42,9 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr)
InsertStreamActive
(
graph_ptr
);
InsertEventForHcomParallel
(
graph_ptr
);
InsertEventForIndependentParallel
(
graph_ptr
);
GetIndependentMaxTarget
(
graph_ptr
);
InsertCtrlForIndependentParallel
(
graph_ptr
);
GetNeedActiveStreams
(
graph_ptr
);
graph_ptr
->
PrintGraphExecuteOrder
();
CheckResourceAssign
(
graph_ptr
);
...
...
@@ -66,7 +69,7 @@ void AscendStreamAssign::ReorderIndependentOrders(const NotNull<KernelGraphPtr>
for
(
size_t
i
=
0
;
i
<
cnode_ptr_list
.
size
();
++
i
)
{
auto
cur_cnode_ptr
=
cnode_ptr_list
[
i
];
MS_EXCEPTION_IF_NULL
(
cur_cnode_ptr
);
if
(
IsIndependentNode
(
cur_cnode_ptr
))
{
if
(
AnfAlgo
::
IsIndependentNode
(
cur_cnode_ptr
))
{
independents
.
emplace_back
(
cur_cnode_ptr
);
}
else
{
others
.
emplace_back
(
cur_cnode_ptr
);
...
...
@@ -133,7 +136,7 @@ void AscendStreamAssign::AssignAllNodesStream(const NotNull<KernelGraphPtr> &gra
continue
;
}
if
(
IsIndependentNode
(
cur_cnode_ptr
))
{
if
(
AnfAlgo
::
IsIndependentNode
(
cur_cnode_ptr
))
{
exit_independent
=
true
;
continue
;
}
...
...
@@ -165,7 +168,7 @@ void AscendStreamAssign::AssignAllNodesStream(const NotNull<KernelGraphPtr> &gra
if
(
AnfAlgo
::
GetStreamId
(
cur_cnode_ptr
)
!=
kInvalidStreamId
)
{
continue
;
}
if
(
IsIndependentNode
(
cur_cnode_ptr
))
{
if
(
AnfAlgo
::
IsIndependentNode
(
cur_cnode_ptr
))
{
AssignIndependentStreamId
(
cur_cnode_ptr
);
}
}
...
...
@@ -242,33 +245,6 @@ void AscendStreamAssign::AssignIndependentStreamId(const CNodePtr &cur_cnode_ptr
}
}
bool
AscendStreamAssign
::
IsIndependentNode
(
const
CNodePtr
&
node_ptr
)
{
MS_EXCEPTION_IF_NULL
(
node_ptr
);
if
(
AnfAlgo
::
GetKernelType
(
node_ptr
)
!=
AICPU_KERNEL
)
{
return
false
;
}
if
(
AnfAlgo
::
GetCNodeName
(
node_ptr
)
==
kGetNextOpName
)
{
MS_LOG
(
INFO
)
<<
"GetNext should not be independent node"
;
return
false
;
}
uint32_t
input_nums
=
AnfAlgo
::
GetInputTensorNum
(
node_ptr
);
if
(
input_nums
==
0
)
{
MS_LOG
(
INFO
)
<<
"Node "
<<
node_ptr
->
fullname_with_scope
()
<<
" is independent, as inputs nums is zero"
;
return
true
;
}
auto
inputs
=
node_ptr
->
inputs
();
for
(
size_t
i
=
1
;
i
<
inputs
.
size
();
i
++
)
{
if
(
!
inputs
[
i
]
->
isa
<
ValueNode
>
())
{
return
false
;
}
}
MS_LOG
(
INFO
)
<<
"Node "
<<
node_ptr
->
fullname_with_scope
()
<<
" is independent, as inputs is all value node"
;
return
true
;
}
// section 3:
void
AscendStreamAssign
::
UpdateAtomicAddrCleanStreamId
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
)
{
MS_LOG
(
INFO
)
<<
"Start"
;
...
...
@@ -293,13 +269,11 @@ void AscendStreamAssign::InsertStreamActive(const NotNull<KernelGraphPtr> &graph
CNodePtr
pre_cnode_ptr
=
nullptr
;
uint32_t
pre_stream_id
=
UINT32_MAX
;
bool
independent_flag
=
!
(
independent_stream_map_
.
empty
());
bool
hcom_flag
=
!
(
hcom_stream_map_
.
empty
());
auto
cnode_ptr_list
=
graph_ptr
->
execution_order
();
for
(
size_t
i
=
0
;
i
<
cnode_ptr_list
.
size
();
++
i
)
{
cur_cnode_ptr
=
cnode_ptr_list
[
i
];
MS_EXCEPTION_IF_NULL
(
cur_cnode_ptr
);
if
(
IsIndependentNode
(
cur_cnode_ptr
))
{
if
(
AnfAlgo
::
IsIndependentNode
(
cur_cnode_ptr
))
{
update_cnode_list
.
emplace_back
(
cur_cnode_ptr
);
continue
;
}
...
...
@@ -322,7 +296,7 @@ void AscendStreamAssign::InsertStreamActive(const NotNull<KernelGraphPtr> &graph
update_cnode_list
.
emplace_back
(
active_ptr
);
}
if
(
(
independent_flag
||
hcom_flag
)
&&
(
AnfAlgo
::
GetCNodeName
(
cur_cnode_ptr
)
==
kStreamSwitchOpName
)
)
{
if
(
AnfAlgo
::
GetCNodeName
(
cur_cnode_ptr
)
==
kStreamSwitchOpName
)
{
MS_LOG
(
INFO
)
<<
"Insert StreamActive op after FP StreamSwitch for stream parallel"
;
UpdateStreamSwitch
(
graph_ptr
,
cur_cnode_ptr
,
&
update_cnode_list
);
}
else
{
...
...
@@ -346,8 +320,10 @@ void AscendStreamAssign::GetProcessedStream(const NotNull<KernelGraphPtr> &graph
uint32_t
cur_stream_id
=
AnfAlgo
::
GetStreamId
(
cur_cnode_ptr
);
if
(
AnfAlgo
::
GetCNodeName
(
cur_cnode_ptr
)
==
kStreamSwitchOpName
)
{
auto
true_stream_id
=
AnfAlgo
::
GetNodeAttr
<
uint32_t
>
(
cur_cnode_ptr
,
kAttrTrueBranchStream
);
processed_streams_
.
emplace
(
true_stream_id
);
if
(
AnfAlgo
::
HasNodeAttr
(
kAttrTrueBranchStream
,
cur_cnode_ptr
))
{
auto
true_stream_id
=
AnfAlgo
::
GetNodeAttr
<
uint32_t
>
(
cur_cnode_ptr
,
kAttrTrueBranchStream
);
processed_streams_
.
emplace
(
true_stream_id
);
}
if
(
!
AnfAlgo
::
HasNodeAttr
(
kStreamNeedActivedFirst
,
cur_cnode_ptr
))
{
continue
;
...
...
@@ -365,46 +341,78 @@ void AscendStreamAssign::GetProcessedStream(const NotNull<KernelGraphPtr> &graph
void
AscendStreamAssign
::
UpdateStreamSwitch
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
,
const
CNodePtr
&
switch_ptr
,
vector
<
CNodePtr
>
*
orders
)
{
orders
->
emplace_back
(
switch_ptr
);
if
(
!
AnfAlgo
::
HasNodeAttr
(
kStreamNeedActivedFirst
,
switch_ptr
))
{
orders
->
emplace_back
(
switch_ptr
);
return
;
}
auto
need_active
=
AnfAlgo
::
GetNodeAttr
<
bool
>
(
switch_ptr
,
kStreamNeedActivedFirst
);
if
(
!
need_active
)
{
orders
->
emplace_back
(
switch_ptr
);
return
;
}
MS_EXCEPTION_IF_NULL
(
switch_ptr
);
auto
true_stream_id
=
AnfAlgo
::
GetNodeAttr
<
uint32_t
>
(
switch_ptr
,
kAttrTrueBranchStream
);
MS_LOG
(
INFO
)
<<
"Streamswtich stream id:"
<<
AnfAlgo
::
GetStreamId
(
switch_ptr
)
<<
"; active stream id:"
<<
true_stream_id
;
CNodePtr
active_ptr
=
KernelAdjust
::
GetInstance
().
CreateStreamActiveOp
(
graph_ptr
);
AnfAlgo
::
SetStreamId
(
true_stream_id
,
active_ptr
.
get
());
vector
<
uint32_t
>
active_ids
;
// active indepdent stream
for
(
const
auto
&
item
:
independent_stream_map_
)
{
active_ids
.
emplace_back
(
item
.
first
);
if
(
!
AnfAlgo
::
HasNodeAttr
(
kAttrStreamSwitchKind
,
switch_ptr
))
{
orders
->
emplace_back
(
switch_ptr
);
return
;
}
// active hcom stream
for
(
const
auto
&
item
:
hcom_stream_map_
)
{
active_ids
.
emplace_back
(
item
.
first
);
auto
kind
=
AnfAlgo
::
GetNodeAttr
<
uint32_t
>
(
switch_ptr
,
kAttrStreamSwitchKind
);
if
(
kind
==
kEosStreamSwitch
||
kind
==
kGetNextStreamSwitch
)
{
orders
->
emplace_back
(
switch_ptr
);
return
;
}
AnfAlgo
::
SetNodeAttr
(
kAttrActiveStreamList
,
MakeValue
<
std
::
vector
<
uint32_t
>>
(
active_ids
),
active_ptr
);
// update processed stream
independent_stream_activated_
=
true
;
for
(
const
auto
&
item
:
independent_stream_map_
)
{
processed_streams_
.
emplace
(
item
.
first
);
}
if
(
kind
==
kIndependentStreamSwitch
)
{
bool
independent_empty
=
independent_stream_map_
.
empty
();
// if indepdent empty: delete independent streamswitch
if
(
!
independent_empty
)
{
for
(
const
auto
&
item
:
independent_stream_map_
)
{
// first independetn stream id is minimum and order by std map;
auto
first_independent_stream
=
item
.
first
;
AnfAlgo
::
SetNodeAttr
(
kAttrTrueBranchStream
,
MakeValue
<
uint32_t
>
(
first_independent_stream
),
switch_ptr
);
orders
->
emplace_back
(
switch_ptr
);
break
;
}
}
else
{
MS_LOG
(
ERROR
)
<<
"independent stream switch exit, but independent stream is empty"
;
}
hcom_stream_activated_
=
true
;
for
(
const
auto
&
item
:
hcom_stream_map_
)
{
processed_streams_
.
emplace
(
item
.
first
);
// update processed stream
independent_stream_activated_
=
true
;
for
(
const
auto
&
item
:
independent_stream_map_
)
{
processed_streams_
.
emplace
(
item
.
first
);
}
return
;
}
orders
->
emplace_back
(
active_ptr
);
if
(
kind
==
kFpBpStreamSwitch
)
{
bool
hcom_empty
=
hcom_stream_map_
.
empty
();
if
(
hcom_empty
)
{
orders
->
emplace_back
(
switch_ptr
);
return
;
}
if
(
!
AnfAlgo
::
HasNodeAttr
(
kAttrTrueBranchStream
,
switch_ptr
))
{
orders
->
emplace_back
(
switch_ptr
);
MS_LOG
(
WARNING
)
<<
"FpBp StreamSwitch has no true branch attr"
;
return
;
}
auto
true_stream_id
=
AnfAlgo
::
GetNodeAttr
<
uint32_t
>
(
switch_ptr
,
kAttrTrueBranchStream
);
MS_LOG
(
INFO
)
<<
"Streamswtich stream id:"
<<
AnfAlgo
::
GetStreamId
(
switch_ptr
)
<<
"; active stream id:"
<<
true_stream_id
;
CNodePtr
active_ptr
=
KernelAdjust
::
GetInstance
().
CreateStreamActiveOp
(
graph_ptr
);
AnfAlgo
::
SetStreamId
(
true_stream_id
,
active_ptr
.
get
());
vector
<
uint32_t
>
active_ids
;
// active hcom stream
for
(
const
auto
&
item
:
hcom_stream_map_
)
{
active_ids
.
emplace_back
(
item
.
first
);
}
AnfAlgo
::
SetNodeAttr
(
kAttrActiveStreamList
,
MakeValue
<
std
::
vector
<
uint32_t
>>
(
active_ids
),
active_ptr
);
hcom_stream_activated_
=
true
;
for
(
const
auto
&
item
:
hcom_stream_map_
)
{
processed_streams_
.
emplace
(
item
.
first
);
}
orders
->
emplace_back
(
switch_ptr
);
orders
->
emplace_back
(
active_ptr
);
}
}
bool
AscendStreamAssign
::
IsProcessedStream
(
uint32_t
stream_id
)
{
...
...
@@ -632,7 +640,7 @@ void AscendStreamAssign::InsertEventForIndependentParallel(const NotNull<KernelG
auto
it
=
cnodes
.
begin
();
while
(
it
!=
cnodes
.
end
())
{
MS_EXCEPTION_IF_NULL
(
*
it
);
if
(
IsIndependentNode
(
*
it
))
{
if
(
AnfAlgo
::
IsIndependentNode
(
*
it
))
{
MS_LOG
(
INFO
)
<<
"Deal independent op["
<<
(
*
it
)
->
DebugString
()
<<
"]"
;
CNodePtr
send_cnode_ptr
=
CreateSendApplyKernel
(
graph_ptr
,
cur_event_id
,
AnfAlgo
::
GetStreamId
(
*
it
));
it
=
cnodes
.
insert
(
it
+
1
,
send_cnode_ptr
);
...
...
@@ -660,6 +668,129 @@ void AscendStreamAssign::InsertEventForIndependentParallel(const NotNull<KernelG
MS_LOG
(
INFO
)
<<
"End"
;
}
void
AscendStreamAssign
::
GetIndependentMaxTarget
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
)
{
MS_LOG
(
INFO
)
<<
"Start"
;
auto
cnode_ptr_list
=
graph_ptr
->
execution_order
();
for
(
size_t
i
=
0
;
i
<
cnode_ptr_list
.
size
();
i
++
)
{
auto
cur_node
=
cnode_ptr_list
[
i
];
auto
key
=
cur_node
.
get
();
if
(
!
AnfAlgo
::
IsIndependentNode
(
cur_node
))
{
continue
;
}
bool
flag
=
false
;
for
(
size_t
j
=
cnode_ptr_list
.
size
()
-
1
;
j
>
i
;
j
--
)
{
auto
target_node
=
cnode_ptr_list
[
j
];
auto
inputs
=
target_node
->
inputs
();
for
(
size_t
m
=
1
;
m
<
inputs
.
size
();
m
++
)
{
auto
input
=
inputs
[
m
];
if
(
opt
::
IsNopNode
(
input
))
{
CNodePtr
cnode
=
input
->
cast
<
CNodePtr
>
();
auto
new_inputs
=
cnode
->
inputs
();
for
(
size_t
k
=
1
;
k
<
new_inputs
.
size
();
k
++
)
{
auto
new_real_input
=
AnfAlgo
::
VisitKernel
(
new_inputs
[
k
],
0
);
if
(
key
==
new_real_input
.
first
.
get
())
{
MS_LOG
(
INFO
)
<<
"Nop node find max target op:"
<<
AnfAlgo
::
GetCNodeName
(
cur_node
);
independent_targets_
.
emplace
(
target_node
.
get
());
flag
=
true
;
break
;
}
}
}
else
{
auto
real_input
=
AnfAlgo
::
VisitKernel
(
input
,
0
);
if
(
key
==
real_input
.
first
.
get
())
{
MS_LOG
(
INFO
)
<<
"Find max target op:"
<<
AnfAlgo
::
GetCNodeName
(
cur_node
);
independent_targets_
.
emplace
(
target_node
.
get
());
flag
=
true
;
}
}
if
(
flag
)
{
break
;
}
}
}
}
MS_LOG
(
INFO
)
<<
"End"
;
}
uint32_t
AscendStreamAssign
::
GetIndexByKey
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
,
const
CNodeKey
&
key
)
{
auto
&
exe_orders
=
graph_ptr
->
execution_order
();
for
(
uint32_t
i
=
0
;
i
<
exe_orders
.
size
();
i
++
)
{
CNodeKey
node_key
=
exe_orders
[
i
].
get
();
if
(
node_key
==
key
)
{
return
i
;
}
}
return
UINT32_MAX
;
}
uint32_t
AscendStreamAssign
::
GetMaxIndexTarget
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
)
{
if
(
independent_targets_
.
empty
())
{
return
UINT32_MAX
;
}
std
::
set
<
uint32_t
>
indexs
;
for
(
const
auto
&
key
:
independent_targets_
)
{
auto
index
=
GetIndexByKey
(
graph_ptr
,
key
);
if
(
index
==
UINT32_MAX
)
{
MS_LOG
(
EXCEPTION
)
<<
"graph has no correspond key"
;
}
indexs
.
emplace
(
index
);
}
return
*
(
std
::
max_element
(
indexs
.
begin
(),
indexs
.
end
()));
}
uint32_t
AscendStreamAssign
::
GetIndependentStreamSwitchStreamId
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
)
{
auto
&
exe_orders
=
graph_ptr
->
execution_order
();
for
(
const
auto
&
item
:
exe_orders
)
{
if
(
AnfAlgo
::
GetCNodeName
(
item
)
==
kStreamSwitchOpName
)
{
if
(
!
AnfAlgo
::
HasNodeAttr
(
kAttrStreamSwitchKind
,
item
))
{
continue
;
}
auto
kind
=
AnfAlgo
::
GetNodeAttr
<
uint32_t
>
(
item
,
kAttrStreamSwitchKind
);
if
(
kind
==
kIndependentStreamSwitch
)
{
return
AnfAlgo
::
GetStreamId
(
item
);
}
}
}
return
kInvalidStreamId
;
}
void
AscendStreamAssign
::
InsertCtrlForIndependentParallel
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
)
{
if
(
independent_targets_
.
empty
())
{
return
;
}
uint32_t
independent_switch_stream
=
GetIndependentStreamSwitchStreamId
(
graph_ptr
);
if
(
independent_switch_stream
==
kInvalidStreamId
)
{
return
;
}
auto
max_index
=
GetMaxIndexTarget
(
graph_ptr
);
auto
&
exe_orders
=
graph_ptr
->
execution_order
();
if
(
max_index
>=
exe_orders
.
size
())
{
MS_LOG
(
EXCEPTION
)
<<
"max target index:"
<<
max_index
<<
" is greater than graph orders size:"
<<
exe_orders
.
size
();
}
auto
max_node_stream
=
AnfAlgo
::
GetStreamId
(
exe_orders
[
max_index
]);
CNodePtr
active_ptr
=
KernelAdjust
::
GetInstance
().
CreateStreamActiveOp
(
graph_ptr
);
// 1.set stream id
AnfAlgo
::
SetStreamId
(
max_node_stream
,
active_ptr
.
get
());
// 2.set active stream ids
std
::
vector
<
uint32_t
>
active_index_list
{
independent_switch_stream
};
AnfAlgo
::
SetNodeAttr
(
kAttrActiveStreamList
,
MakeValue
<
std
::
vector
<
uint32_t
>>
(
active_index_list
),
active_ptr
);
std
::
vector
<
CNodePtr
>
update_cnode_list
;
std
::
copy
(
exe_orders
.
begin
(),
exe_orders
.
begin
()
+
max_index
+
1
,
std
::
back_inserter
(
update_cnode_list
));
update_cnode_list
.
emplace_back
(
active_ptr
);
std
::
copy
(
exe_orders
.
begin
()
+
max_index
+
1
,
exe_orders
.
end
(),
std
::
back_inserter
(
update_cnode_list
));
graph_ptr
->
set_execution_order
(
update_cnode_list
);
}
// section7
void
AscendStreamAssign
::
GetNeedActiveStreams
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
)
{
CNodePtr
cur_cnode_ptr
=
nullptr
;
...
...
@@ -917,6 +1048,7 @@ void AscendStreamAssign::Reset() {
stream_groups_
.
clear
();
stream_relations_
.
clear
();
event_map_
.
clear
();
independent_targets_
.
clear
();
}
// section 10
...
...
mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h
浏览文件 @
66e7b02b
...
...
@@ -39,6 +39,7 @@ using std::shared_ptr;
using
std
::
unordered_map
;
using
std
::
unordered_set
;
using
std
::
vector
;
using
CNodeKey
=
void
*
;
const
uint32_t
kInvalidStreamId
=
UINT32_MAX
;
const
uint32_t
kInvalidEventId
=
UINT32_MAX
;
class
AscendResourceMng
{
...
...
@@ -108,8 +109,6 @@ class AscendStreamAssign {
void
AssignStream
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
);
void
GetHcomStreams
(
std
::
vector
<
uint32_t
>
*
streams
);
void
GetWaitStreams
(
vector
<
uint32_t
>
*
wait_active_stream_list
);
CNodePtr
CreateSendApplyKernel
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
,
uint32_t
event_id
,
uint32_t
stream_id
);
CNodePtr
CreateRecvApplyKernel
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
,
uint32_t
event_id
,
uint32_t
stream_id
);
const
std
::
vector
<
std
::
vector
<
uint32_t
>>
&
get_stream_group
()
const
{
return
stream_groups_
;
}
const
std
::
map
<
CNodePtr
,
CNodePtr
>
&
get_event_map
()
const
{
return
event_map_
;
}
...
...
@@ -117,6 +116,8 @@ class AscendStreamAssign {
AscendStreamAssign
()
=
default
;
~
AscendStreamAssign
()
=
default
;
void
Reset
();
CNodePtr
CreateSendApplyKernel
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
,
uint32_t
event_id
,
uint32_t
stream_id
);
CNodePtr
CreateRecvApplyKernel
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
,
uint32_t
event_id
,
uint32_t
stream_id
);
void
CheckResourceAssign
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
);
void
CheckStreamAssign
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
);
void
CheckEventAssign
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
);
...
...
@@ -130,6 +131,7 @@ class AscendStreamAssign {
void
UpdateStreamSwitch
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
,
const
CNodePtr
&
switch_ptr
,
vector
<
CNodePtr
>
*
orders
);
void
InsertEventForIndependentParallel
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
);
void
InsertCtrlForIndependentParallel
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
);
void
InsertEventForHcomParallel
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
);
void
InsertEventCommonDependHcom
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
);
void
InsertEventHcomDependCommon
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
);
...
...
@@ -141,6 +143,10 @@ class AscendStreamAssign {
void
GetProcessedStream
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
);
void
GetNeedActiveStreams
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
);
void
ReorderIndependentOrders
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
);
uint32_t
GetMaxIndexTarget
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
);
uint32_t
GetIndexByKey
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
,
const
CNodeKey
&
key
);
uint32_t
GetIndependentStreamSwitchStreamId
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
);
void
GetIndependentMaxTarget
(
const
NotNull
<
KernelGraphPtr
>
&
graph_ptr
);
bool
IsTaskSink
();
bool
IsHcom
(
const
CNodePtr
&
cur_cnode_ptr
);
...
...
@@ -171,6 +177,7 @@ class AscendStreamAssign {
std
::
map
<
uint32_t
,
uint32_t
>
common_stream_map_
{};
std
::
set
<
uint32_t
>
processed_streams_
{};
std
::
vector
<
uint32_t
>
need_first_active_streams_
{};
std
::
set
<
CNodeKey
>
independent_targets_
;
// attr for memory copy reuse
std
::
map
<
uint32_t
,
std
::
vector
<
uint32_t
>>
stream_relations_
{};
...
...
mindspore/ccsrc/runtime/device/ascend/dump/data_dumper.cc
浏览文件 @
66e7b02b
...
...
@@ -37,8 +37,8 @@ static constexpr uint32_t kTupleTaskId = 0;
static
constexpr
uint32_t
kTupleStreamId
=
1
;
static
constexpr
uint32_t
kTupleArgs
=
2
;
static
constexpr
uint32_t
kCurrentStepTensorIndex
=
0
;
static
constexpr
uint32_t
kCurrentEpochTensorIndex
=
1
;
static
constexpr
uint32_t
kStepsPerEpochTensorIndex
=
2
;
static
constexpr
uint32_t
kCurrentEpochTensorIndex
=
2
;
static
constexpr
uint32_t
kStepsPerEpochTensorIndex
=
3
;
static
constexpr
uint64_t
kOpDebugShape
=
2048
;
static
constexpr
uint64_t
kOpDebugHostMemSize
=
2048
;
static
constexpr
uint64_t
kOpDebugDevMemSize
=
sizeof
(
void
*
);
...
...
mindspore/ccsrc/runtime/device/kernel_adjust.cc
浏览文件 @
66e7b02b
...
...
@@ -103,6 +103,19 @@ CNodePtr KernelAdjust::CreateRecvApplyKernel(const std::shared_ptr<session::Kern
return
recv_node_ptr
;
}
bool
KernelAdjust
::
ExitIndependent
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph_ptr
);
const
auto
&
exe_orders
=
kernel_graph_ptr
->
execution_order
();
for
(
const
auto
&
node
:
exe_orders
)
{
if
(
AnfAlgo
::
IsIndependentNode
(
node
))
{
MS_LOG
(
INFO
)
<<
"graph exit independent node"
;
return
true
;
}
}
return
false
;
}
void
KernelAdjust
::
InsertSwitchLoop
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
)
{
device
::
ascend
::
AscendResourceMng
&
resource_manager
=
device
::
ascend
::
AscendResourceMng
::
GetInstance
();
resource_manager
.
ResetResource
();
...
...
@@ -117,10 +130,10 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph>
std
::
vector
<
AnfNodePtr
>
*
mute_inputs
=
kernel_graph_ptr
->
MutableInputs
();
MS_EXCEPTION_IF_NULL
(
mute_inputs
);
mute_inputs
->
push_back
(
switch_loop_input
[
kLoopCountParamName
]);
mute_inputs
->
push_back
(
switch_loop_input
[
kCurLoopCountParamName
]);
mute_inputs
->
push_back
(
switch_loop_input
[
kNextLoopCountParamName
]);
mute_inputs
->
push_back
(
switch_loop_input
[
kEpochParamName
]);
mute_inputs
->
push_back
(
switch_loop_input
[
kIterLoopParamName
]);
mute_inputs
->
push_back
(
switch_loop_input
[
kZeroParamName
]);
mute_inputs
->
push_back
(
switch_loop_input
[
kOneParamName
]);
for
(
const
auto
&
input
:
kernel_graph_ptr
->
inputs
())
{
MS_EXCEPTION_IF_NULL
(
input
);
...
...
@@ -145,7 +158,7 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph>
// getnext loop process
// getnext loop stream switch op
CNodePtr
getnext_switch_app
=
CreateStreamSwitchOp
(
kernel_graph_ptr
,
switch_loop_input
);
CNodePtr
getnext_switch_app
=
CreateStreamSwitchOp
(
kernel_graph_ptr
,
switch_loop_input
,
kGetNextStreamSwitch
);
MS_EXCEPTION_IF_NULL
(
getnext_switch_app
);
uint32_t
getnext_switch_stream_id
=
resource_manager
.
ApplyNewStream
();
AnfAlgo
::
SetStreamId
(
getnext_switch_stream_id
,
getnext_switch_app
.
get
());
...
...
@@ -165,7 +178,9 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph>
}
// update getnext loop stream switch true_branch_stream attr
AnfAlgo
::
SetNodeAttr
(
kStreamNeedActivedFirst
,
MakeValue
<
bool
>
(
true
),
getnext_switch_app
);
AnfAlgo
::
SetNodeAttr
(
kAttrTrueBranchStream
,
MakeValue
<
uint32_t
>
(
getnext_stream_id
),
getnext_switch_app
);
AnfAlgo
::
SetNodeAttr
(
kAttrStreamSwitchKind
,
MakeValue
<
uint32_t
>
(
kGetNextStreamSwitch
),
getnext_switch_app
);
// getnext loop fpbp start send
uint32_t
fpbp_start_event_id
=
resource_manager
.
ApplyNewEvent
();
...
...
@@ -182,7 +197,7 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph>
// End Of Sequence loop process
// eos loop stream switch
CNodePtr
eos_switch_app
=
CreateStreamSwitchOp
(
kernel_graph_ptr
,
switch_loop_input
);
CNodePtr
eos_switch_app
=
CreateStreamSwitchOp
(
kernel_graph_ptr
,
switch_loop_input
,
kEosStreamSwitch
);
MS_EXCEPTION_IF_NULL
(
eos_switch_app
);
uint32_t
eos_switch_stream_id
=
resource_manager
.
ApplyNewStream
();
AnfAlgo
::
SetStreamId
(
eos_switch_stream_id
,
eos_switch_app
.
get
());
...
...
@@ -197,6 +212,7 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph>
// update eos loop stream switch true_branch_stream attr
AnfAlgo
::
SetNodeAttr
(
kAttrTrueBranchStream
,
MakeValue
<
uint32_t
>
(
eos_stream_id
),
eos_switch_app
);
AnfAlgo
::
SetNodeAttr
(
kAttrStreamSwitchKind
,
MakeValue
<
uint32_t
>
(
kEosStreamSwitch
),
eos_switch_app
);
// EndOfSequence op
CNodePtr
end_of_sequence_op
=
CreateEndOfSequenceOP
(
kernel_graph_ptr
,
getnext_cnode
);
...
...
@@ -214,13 +230,27 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph>
fpbp_active_streams
.
push_back
(
eos_switch_stream_id
);
}
bool
exit_independent
=
ExitIndependent
(
kernel_graph_ptr
);
if
(
exit_independent
)
{
// Independet parallel
CNodePtr
independent_switch_app
=
CreateStreamSwitchOp
(
kernel_graph_ptr
,
switch_loop_input
,
kIndependentStreamSwitch
);
MS_EXCEPTION_IF_NULL
(
independent_switch_app
);
uint32_t
independent_switch_stream_id
=
resource_manager
.
ApplyNewStream
();
AnfAlgo
::
SetStreamId
(
independent_switch_stream_id
,
independent_switch_app
.
get
());
AnfAlgo
::
SetNodeAttr
(
kStreamNeedActivedFirst
,
MakeValue
<
bool
>
(
true
),
independent_switch_app
);
AnfAlgo
::
SetNodeAttr
(
kAttrStreamSwitchKind
,
MakeValue
<
uint32_t
>
(
kIndependentStreamSwitch
),
independent_switch_app
);
exec_order
.
push_back
(
independent_switch_app
);
}
// fpbp loop process
// fpbp loop stream switch
CNodePtr
fpbp_switch_app
=
CreateStreamSwitchOp
(
kernel_graph_ptr
,
switch_loop_input
);
CNodePtr
fpbp_switch_app
=
CreateStreamSwitchOp
(
kernel_graph_ptr
,
switch_loop_input
,
kFpBpStreamSwitch
);
MS_EXCEPTION_IF_NULL
(
fpbp_switch_app
);
uint32_t
fpbp_switch_stream_id
=
resource_manager
.
ApplyNewStream
();
AnfAlgo
::
SetStreamId
(
fpbp_switch_stream_id
,
fpbp_switch_app
.
get
());
AnfAlgo
::
SetNodeAttr
(
kStreamNeedActivedFirst
,
MakeValue
<
bool
>
(
true
),
fpbp_switch_app
);
exec_order
.
push_back
(
fpbp_switch_app
);
// fpbp loop fpbp start recv
...
...
@@ -231,9 +261,9 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph>
// update fpbp loop stream switch true_branch_stream attr
AnfAlgo
::
SetNodeAttr
(
kAttrTrueBranchStream
,
MakeValue
<
uint32_t
>
(
fpbp_stream_id
),
fpbp_switch_app
);
//
fpbp
loop AssignAdd
CNodePtr
assign_add_one
=
CreateStreamAssignAddnOP
(
kernel_graph_ptr
,
switch_loop_input
);
AnfAlgo
::
SetNodeAttr
(
kAttrStreamSwitchKind
,
MakeValue
<
uint32_t
>
(
kFpBpStreamSwitch
),
fpbp_switch_app
);
//
next
loop AssignAdd
CNodePtr
assign_add_one
=
CreateStreamAssignAddnOP
(
kernel_graph_ptr
,
switch_loop_input
,
false
);
MS_EXCEPTION_IF_NULL
(
assign_add_one
);
AnfAlgo
::
SetStreamId
(
fpbp_stream_id
,
assign_add_one
.
get
());
exec_order
.
push_back
(
assign_add_one
);
...
...
@@ -271,6 +301,11 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph>
// fpbp loop other ops
(
void
)
std
::
copy
(
other_list
.
begin
(),
other_list
.
end
(),
std
::
back_inserter
(
exec_order
));
// current assign add op
CNodePtr
cur_assign_add
=
CreateStreamAssignAddnOP
(
kernel_graph_ptr
,
switch_loop_input
,
true
);
MS_EXCEPTION_IF_NULL
(
cur_assign_add
);
exec_order
.
push_back
(
cur_assign_add
);
// stream active to activate fpbp loop and eos loop
CNodePtr
fpbp_active_app
=
CreateStreamActiveOp
(
kernel_graph_ptr
);
MS_EXCEPTION_IF_NULL
(
fpbp_active_app
);
...
...
@@ -293,13 +328,19 @@ void KernelAdjust::CreateSwitchOpParameters(const std::shared_ptr<session::Kerne
MS_LOG
(
EXCEPTION
)
<<
"create abstract before insert switch op failed!"
;
}
ParameterPtr
loop_count
=
std
::
make_shared
<
Parameter
>
(
kernel_graph_ptr
);
MS_EXCEPTION_IF_NULL
(
loop_count
);
loop_count
->
set_name
(
kLoopCountParamName
);
loop_count
->
set_abstract
(
paremeter_abstract_ptr
);
ParameterPtr
loop_count_new
=
kernel_graph_ptr
->
NewParameter
(
loop_count
);
ParameterPtr
cur_loop_count
=
std
::
make_shared
<
Parameter
>
(
kernel_graph_ptr
);
MS_EXCEPTION_IF_NULL
(
cur_loop_count
);
cur_loop_count
->
set_name
(
kCurLoopCountParamName
);
cur_loop_count
->
set_abstract
(
paremeter_abstract_ptr
);
ParameterPtr
loop_count_cur
=
kernel_graph_ptr
->
NewParameter
(
cur_loop_count
);
(
*
switch_loop_input
)[
kCurLoopCountParamName
]
=
loop_count_cur
;
(
*
switch_loop_input
)[
kLoopCountParamName
]
=
loop_count_new
;
ParameterPtr
next_loop_count
=
std
::
make_shared
<
Parameter
>
(
kernel_graph_ptr
);
MS_EXCEPTION_IF_NULL
(
next_loop_count
);
next_loop_count
->
set_name
(
kNextLoopCountParamName
);
next_loop_count
->
set_abstract
(
paremeter_abstract_ptr
);
ParameterPtr
loop_count_next
=
kernel_graph_ptr
->
NewParameter
(
next_loop_count
);
(
*
switch_loop_input
)[
kNextLoopCountParamName
]
=
loop_count_next
;
ParameterPtr
iter_loop
=
std
::
make_shared
<
Parameter
>
(
kernel_graph_ptr
);
iter_loop
->
set_name
(
kIterLoopParamName
);
...
...
@@ -307,12 +348,6 @@ void KernelAdjust::CreateSwitchOpParameters(const std::shared_ptr<session::Kerne
ParameterPtr
iter_loop_new
=
kernel_graph_ptr
->
NewParameter
(
iter_loop
);
(
*
switch_loop_input
)[
kIterLoopParamName
]
=
iter_loop_new
;
ParameterPtr
zero
=
std
::
make_shared
<
Parameter
>
(
kernel_graph_ptr
);
zero
->
set_name
(
kZeroParamName
);
zero
->
set_abstract
(
paremeter_abstract_ptr
);
ParameterPtr
zero_new
=
kernel_graph_ptr
->
NewParameter
(
zero
);
(
*
switch_loop_input
)[
kZeroParamName
]
=
zero_new
;
ParameterPtr
one
=
std
::
make_shared
<
Parameter
>
(
kernel_graph_ptr
);
one
->
set_name
(
kOneParamName
);
one
->
set_abstract
(
paremeter_abstract_ptr
);
...
...
@@ -340,14 +375,22 @@ kernel::KernelBuildInfo::KernelBuildInfoBuilder KernelAdjust::CreateMngKernelBui
}
CNodePtr
KernelAdjust
::
CreateStreamSwitchOp
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
,
const
std
::
map
<
std
::
string
,
mindspore
::
ParameterPtr
>
&
switch_loop_input
)
{
const
std
::
map
<
std
::
string
,
mindspore
::
ParameterPtr
>
&
switch_loop_input
,
StreamSwitchKind
kind
)
{
kernel
::
KernelBuildInfo
::
KernelBuildInfoBuilder
selected_kernel_builder
=
CreateMngKernelBuilder
(
{
kOpFormat_DEFAULT
,
kOpFormat_DEFAULT
},
{
TypeId
::
kNumberTypeInt32
,
TypeId
::
kNumberTypeInt32
});
auto
typeNone_abstract
=
std
::
make_shared
<
abstract
::
AbstractNone
>
();
auto
stream_switch
=
std
::
make_shared
<
Primitive
>
(
kStreamSwitchOpName
);
std
::
vector
<
AnfNodePtr
>
inputs
;
inputs
.
push_back
(
NewValueNode
(
stream_switch
));
inputs
.
push_back
(
switch_loop_input
.
at
(
kLoopCountParamName
));
if
(
kind
==
kFpBpStreamSwitch
||
kind
==
kEosStreamSwitch
)
{
inputs
.
push_back
(
switch_loop_input
.
at
(
kNextLoopCountParamName
));
}
else
if
(
kind
==
kGetNextStreamSwitch
||
kind
==
kIndependentStreamSwitch
)
{
inputs
.
push_back
(
switch_loop_input
.
at
(
kNextLoopCountParamName
));
}
else
{
MS_LOG
(
ERROR
)
<<
"unknown stream switch kind"
;
}
inputs
.
push_back
(
switch_loop_input
.
at
(
kIterLoopParamName
));
MS_EXCEPTION_IF_NULL
(
kernel_graph_ptr
);
CNodePtr
stream_switch_app
=
kernel_graph_ptr
->
NewCNode
(
inputs
);
...
...
@@ -430,9 +473,9 @@ CNodePtr KernelAdjust::CreateEndOfSequenceOP(const std::shared_ptr<session::Kern
return
end_of_sequence_node
;
}
CNodePtr
KernelAdjust
::
CreateStreamAssignAddnOP
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
,
const
std
::
map
<
std
::
string
,
mindspore
::
ParameterPtr
>
&
switch_loop_input
)
{
CNodePtr
KernelAdjust
::
CreateStreamAssignAddnOP
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
,
const
std
::
map
<
std
::
string
,
mindspore
::
ParameterPtr
>
&
switch_loop_input
,
bool
cur_loop
)
{
MS_EXCEPTION_IF_NULL
(
kernel_graph_ptr
);
kernel
::
KernelBuildInfo
::
KernelBuildInfoBuilder
selected_kernel_builder
=
CreateMngKernelBuilder
(
{
kOpFormat_DEFAULT
,
kOpFormat_DEFAULT
},
{
TypeId
::
kNumberTypeInt32
,
TypeId
::
kNumberTypeInt32
});
...
...
@@ -442,7 +485,12 @@ CNodePtr KernelAdjust::CreateStreamAssignAddnOP(
auto
assign_add
=
std
::
make_shared
<
Primitive
>
(
kAssignAddOpName
);
std
::
vector
<
AnfNodePtr
>
inputs
;
inputs
.
push_back
(
NewValueNode
(
assign_add
));
inputs
.
push_back
(
switch_loop_input
.
at
(
kLoopCountParamName
));
if
(
cur_loop
)
{
inputs
.
push_back
(
switch_loop_input
.
at
(
kCurLoopCountParamName
));
}
else
{
inputs
.
push_back
(
switch_loop_input
.
at
(
kNextLoopCountParamName
));
}
inputs
.
push_back
(
switch_loop_input
.
at
(
kOneParamName
));
CNodePtr
assign_add_one
=
kernel_graph_ptr
->
NewCNode
(
inputs
);
MS_EXCEPTION_IF_NULL
(
assign_add_one
);
...
...
@@ -454,8 +502,8 @@ CNodePtr KernelAdjust::CreateStreamAssignAddnOP(
AnfAlgo
::
SetNodeAttr
(
"input_names"
,
input_names_v
,
assign_add_one
);
AnfAlgo
::
SetNodeAttr
(
"output_names"
,
output_names_v
,
assign_add_one
);
selected_kernel_builder
.
SetKernelType
(
KernelType
::
TBE_KERNEL
);
MS_EXCEPTION_IF_NULL
(
switch_loop_input
.
at
(
kLoopCountParamName
));
assign_add_one
->
set_abstract
(
switch_loop_input
.
at
(
kLoopCountParamName
)
->
abstract
());
MS_EXCEPTION_IF_NULL
(
switch_loop_input
.
at
(
k
Cur
LoopCountParamName
));
assign_add_one
->
set_abstract
(
switch_loop_input
.
at
(
k
Cur
LoopCountParamName
)
->
abstract
());
return
assign_add_one
;
}
...
...
@@ -510,14 +558,23 @@ bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr<session::KernelGraph
void
KernelAdjust
::
LoadSwitchInputs
(
std
::
vector
<
tensor
::
TensorPtr
>
*
inputs
)
{
MS_LOG
(
INFO
)
<<
"---------------- LoadSwitchInputs---"
;
MS_EXCEPTION_IF_NULL
(
inputs
);
// current loop count
std
::
vector
<
int
>
shp
=
{
1
};
tensor
::
TensorPtr
loop_count_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kInt32
->
type_id
(),
shp
);
MS_EXCEPTION_IF_NULL
(
loop_count_tensor
);
tensor
::
TensorPtr
cur_loop_count
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kInt32
->
type_id
(),
shp
);
MS_EXCEPTION_IF_NULL
(
cur_loop_count
);
int32_t
*
val
=
nullptr
;
val
=
static_cast
<
int32_t
*>
(
loop_count_tensor
->
data_c
());
val
=
static_cast
<
int32_t
*>
(
cur_loop_count
->
data_c
());
MS_EXCEPTION_IF_NULL
(
val
);
*
val
=
0
;
inputs
->
push_back
(
loop_count_tensor
);
inputs
->
push_back
(
cur_loop_count
);
// next loop count
tensor
::
TensorPtr
next_loop_count
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kInt32
->
type_id
(),
shp
);
MS_EXCEPTION_IF_NULL
(
next_loop_count
);
val
=
static_cast
<
int32_t
*>
(
next_loop_count
->
data_c
());
MS_EXCEPTION_IF_NULL
(
val
);
*
val
=
0
;
inputs
->
push_back
(
next_loop_count
);
// Epoch in device
tensor
::
TensorPtr
epoch_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kInt32
->
type_id
(),
shp
);
...
...
@@ -527,6 +584,7 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
*
val
=
0
;
inputs
->
push_back
(
epoch_tensor
);
// total loop count per iter
tensor
::
TensorPtr
iter_loop_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kInt32
->
type_id
(),
shp
);
MS_EXCEPTION_IF_NULL
(
iter_loop_tensor
);
val
=
static_cast
<
int32_t
*>
(
iter_loop_tensor
->
data_c
());
...
...
@@ -535,13 +593,6 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
MS_LOG
(
INFO
)
<<
"iter_loop_tensor = "
<<
*
val
;
inputs
->
push_back
(
iter_loop_tensor
);
tensor
::
TensorPtr
zero_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kInt32
->
type_id
(),
shp
);
MS_EXCEPTION_IF_NULL
(
zero_tensor
);
val
=
static_cast
<
int32_t
*>
(
zero_tensor
->
data_c
());
MS_EXCEPTION_IF_NULL
(
val
);
*
val
=
0
;
inputs
->
push_back
(
zero_tensor
);
tensor
::
TensorPtr
one_tensor
=
std
::
make_shared
<
tensor
::
Tensor
>
(
kInt32
->
type_id
(),
shp
);
MS_EXCEPTION_IF_NULL
(
one_tensor
);
val
=
static_cast
<
int32_t
*>
(
one_tensor
->
data_c
());
...
...
mindspore/ccsrc/runtime/device/kernel_adjust.h
浏览文件 @
66e7b02b
...
...
@@ -33,13 +33,19 @@
using
mindspore
::
device
::
ascend
::
ProfilingTraceInfo
;
using
mindspore
::
device
::
ascend
::
ProfilingUtils
;
namespace
mindspore
{
constexpr
auto
kLoopCountParamName
=
"loop_count"
;
constexpr
auto
kCurLoopCountParamName
=
"cur_loop_count"
;
constexpr
auto
kNextLoopCountParamName
=
"next_loop_count"
;
constexpr
auto
kIterLoopParamName
=
"iter_loop"
;
constexpr
auto
kZeroParamName
=
"zero"
;
constexpr
auto
kOneParamName
=
"one"
;
constexpr
auto
kEpochParamName
=
"loop_epoch"
;
constexpr
auto
kStreamNeedActivedFirst
=
"stream_need_active_first"
;
constexpr
uint32_t
kSecondStreamSwitchLabel
=
2
;
enum
StreamSwitchKind
{
kFpBpStreamSwitch
=
0
,
kGetNextStreamSwitch
=
1
,
kEosStreamSwitch
=
2
,
kIndependentStreamSwitch
=
3
};
namespace
device
{
class
KernelAdjust
{
...
...
@@ -65,18 +71,22 @@ class KernelAdjust {
void
CreateSwitchOpParameters
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
,
std
::
map
<
std
::
string
,
mindspore
::
ParameterPtr
>
*
switch_loop_input
);
CNodePtr
CreateStreamSwitchOp
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
,
const
std
::
map
<
std
::
string
,
mindspore
::
ParameterPtr
>
&
switch_loop_input
);
const
std
::
map
<
std
::
string
,
mindspore
::
ParameterPtr
>
&
switch_loop_input
,
StreamSwitchKind
kind
);
CNodePtr
CreatTupleGetItemNode
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
,
const
CNodePtr
&
node
,
size_t
output_idx
);
CNodePtr
CreateEndOfSequenceOP
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
,
const
CNodePtr
&
getnext_cnode
);
CNodePtr
CreateStreamAssignAddnOP
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
,
const
std
::
map
<
std
::
string
,
mindspore
::
ParameterPtr
>
&
switch_loop_input
);
const
std
::
map
<
std
::
string
,
mindspore
::
ParameterPtr
>
&
switch_loop_input
,
bool
cur_loop
);
kernel
::
KernelBuildInfo
::
KernelBuildInfoBuilder
CreateMngKernelBuilder
(
const
std
::
vector
<
std
::
string
>
&
formats
,
const
std
::
vector
<
TypeId
>
&
type_ids
);
void
LoadSwitchInputs
(
std
::
vector
<
tensor
::
TensorPtr
>
*
inputs
);
void
InsertProfilingKernel
(
const
ProfilingTraceInfo
&
profiling_trace_info
,
NotNull
<
session
::
KernelGraph
*>
kernel_graph_ptr
);
bool
ExitIndependent
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
graph_ptr
);
};
}
// namespace device
}
// namespace mindspore
...
...
mindspore/ccsrc/runtime/device/kernel_runtime.cc
浏览文件 @
66e7b02b
...
...
@@ -576,6 +576,14 @@ void KernelRuntime::AssignNodeOutputMem(MemType type, const AnfNodePtr &node, in
MS_LOG
(
INFO
)
<<
"GetNext disable mem_reuse"
;
type
=
kDynamicMem
;
}
if
(
node
->
isa
<
CNode
>
())
{
bool
independent
=
AnfAlgo
::
IsIndependentNode
(
node
->
cast
<
CNodePtr
>
());
if
(
independent
&&
type
==
kReuseDynamicMem
)
{
MS_LOG
(
INFO
)
<<
"Independent disable mem_reuse"
;
type
=
kDynamicMem
;
}
}
auto
kernel_mod
=
AnfAlgo
::
GetKernelMod
(
node
);
MS_EXCEPTION_IF_NULL
(
kernel_mod
);
auto
output_sizes
=
kernel_mod
->
GetOutputSizeList
();
...
...
mindspore/ccsrc/utils/utils.h
浏览文件 @
66e7b02b
...
...
@@ -210,6 +210,7 @@ constexpr auto kAttrDataType = "data_type";
constexpr
auto
kAttrActiveTarget
=
"active_target"
;
constexpr
auto
kAttrActiveStreamList
=
"active_stream_list"
;
constexpr
auto
kAttrTrueBranchStream
=
"true_branch_stream"
;
constexpr
auto
kAttrStreamSwitchKind
=
"stream_switch_kind"
;
constexpr
auto
kAttrEventId
=
"event_id"
;
constexpr
auto
kAttrDynInput
=
"dynamic"
;
constexpr
auto
kAttrDynInputSizes
=
"dyn_input_sizes"
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录