Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
effdb483
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
effdb483
编写于
4月 07, 2020
作者:
J
jojobugfree
提交者:
jojo
4月 08, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
profiling feature enhancement
上级
315036b1
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
190 addition
and
181 deletion
+190
-181
mindspore/ccsrc/device/ascend/ascend_stream_assign.cc
mindspore/ccsrc/device/ascend/ascend_stream_assign.cc
+1
-1
mindspore/ccsrc/device/ascend/profiling/profiling_manager.h
mindspore/ccsrc/device/ascend/profiling/profiling_manager.h
+0
-4
mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
+105
-124
mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
+66
-27
mindspore/ccsrc/device/kernel_adjust.cc
mindspore/ccsrc/device/kernel_adjust.cc
+12
-19
mindspore/ccsrc/device/kernel_adjust.h
mindspore/ccsrc/device/kernel_adjust.h
+3
-3
mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
.../ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+1
-1
mindspore/ccsrc/session/ascend_session.cc
mindspore/ccsrc/session/ascend_session.cc
+1
-1
tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
+1
-1
未找到文件。
mindspore/ccsrc/device/ascend/ascend_stream_assign.cc
浏览文件 @
effdb483
...
...
@@ -702,7 +702,7 @@ void AscendStreamAssign::PrintGraphExeOrders(const shared_ptr<mindspore::session
<<
AnfAlgo
::
GetStreamId
(
cur_cnode_ptr
)
<<
"], event_id["
<<
GetValue
<
uint32_t
>
(
primitive
->
GetAttr
(
kAttrEventId
))
<<
"]"
;
}
else
{
MS_LOG
(
INFO
)
<<
"node name["
<<
AnfAlgo
::
GetCNodeName
(
cur_cnode_ptr
)
<<
"], logic id["
MS_LOG
(
INFO
)
<<
"node name["
<<
cur_cnode_ptr
->
fullname_with_scope
(
)
<<
"], logic id["
<<
AnfAlgo
::
GetStreamDistinctionLabel
(
cur_cnode_ptr
.
get
())
<<
"], stream id["
<<
AnfAlgo
::
GetStreamId
(
cur_cnode_ptr
)
<<
"]"
;
}
...
...
mindspore/ccsrc/device/ascend/profiling/profiling_manager.h
浏览文件 @
effdb483
...
...
@@ -29,10 +29,6 @@ namespace ascend {
// PROFILING_CUSTOM_LOGID_START 3
const
uint64_t
kProfilingFpStartLogId
=
1
;
const
uint64_t
kProfilingBpEndLogId
=
2
;
const
uint64_t
kProfilingAllReduce1Start
=
3
;
const
uint64_t
kProfilingAllReduce1End
=
4
;
const
uint64_t
kProfilingAllReduce2Start
=
5
;
const
uint64_t
kProfilingAllReduce2End
=
6
;
const
uint64_t
kProfilingIterEndLogId
=
255
;
class
ProfilingEngineImpl
;
...
...
mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
浏览文件 @
effdb483
...
...
@@ -14,10 +14,8 @@
* limitations under the License.
*/
#include "device/ascend/profiling/profiling_utils.h"
#include <map>
#include "device/ascend/profiling/profiling_utils.h"
#include "kernel/kernel.h"
#include "device/ascend/profiling/profiling_manager.h"
#include "session/anf_runtime_algorithm.h"
...
...
@@ -27,82 +25,61 @@
namespace
mindspore
{
namespace
device
{
namespace
ascend
{
const
char
ProfilingUtils
::
kProfiling
[]
=
"Profiling"
;
const
char
ProfilingUtils
::
kNotify
[]
=
"notify"
;
const
char
ProfilingUtils
::
kProfilerTraceId
[]
=
"profiler_trace_id"
;
const
char
ProfilingUtils
::
kFlags
[]
=
"flags"
;
constexpr
uint32_t
kMaxProfilingNodeNum
=
100
;
constexpr
char
kCustomNode
[]
=
"PROFILING_CUSTOM_"
;
constexpr
char
kFpStartNode
[]
=
"PROFILING_FP_START"
;
constexpr
char
kBpEndNode
[]
=
"PROFILING_BP_END"
;
constexpr
char
kIterEndNode
[]
=
"PROFILING_ITER_END"
;
std
::
unordered_map
<
uint32_t
,
std
::
vector
<
std
::
string
>>
ProfilingUtils
::
graph_kernel_name_
;
bool
ProfilingUtils
::
GetProfilingTraceInfo
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
graph_ptr
,
ProfilingTraceInfo
*
profiling_trace_info
)
{
MS_EXCEPTION_IF_NULL
(
profiling_trace_info
);
MS_EXCEPTION_IF_NULL
(
graph_ptr
);
bool
find_begin
=
false
;
bool
first_allreduce
=
true
;
for
(
const
auto
&
anf_node
:
graph_ptr
->
execution_order
())
{
if
(
anf_node
->
isa
<
CNode
>
())
{
const
std
::
string
kernel_name
=
AnfAlgo
::
GetCNodeName
(
anf_node
);
if
((
kernel_name
==
"Cast"
||
kernel_name
==
"Four2Five"
)
&&
!
find_begin
)
{
profiling_trace_info
->
profiling_trace_begin
=
anf_node
->
fullname_with_scope
();
find_begin
=
true
;
}
if
(
kernel_name
==
"Conv2DBackpropFilter"
)
{
profiling_trace_info
->
profiling_trace_bp_end
=
anf_node
->
fullname_with_scope
();
}
if
(
kernel_name
==
kFusedMulApplyMomentumOpName
||
kernel_name
==
kApplyMomentumOpName
)
{
profiling_trace_info
->
profiling_trace_netoutput
=
anf_node
->
fullname_with_scope
();
}
if
(
kernel_name
==
kAllReduceOpName
)
{
if
(
first_allreduce
)
{
profiling_trace_info
->
profiling_allreduce1_start
=
anf_node
->
fullname_with_scope
();
profiling_trace_info
->
profiling_allreduce1_end
=
anf_node
->
fullname_with_scope
();
first_allreduce
=
false
;
}
else
{
profiling_trace_info
->
profiling_allreduce2_start
=
anf_node
->
fullname_with_scope
();
profiling_trace_info
->
profiling_allreduce2_end
=
anf_node
->
fullname_with_scope
();
}
}
uint32_t
ProfilingUtils
::
custom_node_index_
=
1
;
ProfilingTraceInfo
ProfilingUtils
::
GetProfilingTraceFromEnv
(
NotNull
<
session
::
KernelGraph
*>
graph_ptr
)
{
MS_LOG
(
INFO
)
<<
"get env start"
;
custom_node_index_
=
1
;
auto
&
cnode_exec_order
=
graph_ptr
->
execution_order
();
ProfilingTraceInfo
profiling_trace
;
profiling_trace
.
trace_begin
=
GetTraceBegin
(
cnode_exec_order
);
profiling_trace
.
trace_bp_end
=
GetTraceBpEnd
();
profiling_trace
.
trace_netoutput
=
GetTraceNetoutput
(
cnode_exec_order
);
MS_LOG
(
INFO
)
<<
"[profiling] trace_begin:"
<<
profiling_trace
.
trace_begin
<<
" trace_bp_end:"
<<
profiling_trace
.
trace_bp_end
<<
" trace_netoutput:"
<<
profiling_trace
.
trace_netoutput
;
for
(
uint32_t
i
=
1
;
i
<=
kMaxProfilingNodeNum
;
++
i
)
{
std
::
string
env_str
=
std
::
string
(
kCustomNode
)
+
std
::
to_string
(
i
);
const
char
*
node_full_name
=
std
::
getenv
(
env_str
.
c_str
());
if
(
node_full_name
==
nullptr
)
{
break
;
}
MS_LOG
(
INFO
)
<<
"Get profiling node:"
<<
node_full_name
;
profiling_trace
.
trace_custom_node
.
insert
(
node_full_name
);
}
MS_LOG
(
INFO
)
<<
"[profiling]begin:"
<<
profiling_trace_info
->
profiling_trace_begin
<<
", net_output:"
<<
profiling_trace_info
->
profiling_trace_netoutput
<<
", end:"
<<
profiling_trace_info
->
profiling_trace_bp_end
<<
", allreduce1:"
<<
profiling_trace_info
->
profiling_allreduce1_start
<<
", allreduce2:"
<<
profiling_trace_info
->
profiling_allreduce2_start
;
return
profiling_trace_info
->
IsValid
();
MS_LOG
(
INFO
)
<<
"get env end"
;
return
profiling_trace
;
}
bool
ProfilingUtils
::
GetNetOutput
(
AnfNodePtr
anf_node
,
std
::
string
*
profiling_trace_net_output
)
{
MS_EXCEPTION_IF_NULL
(
anf_node
);
MS_EXCEPTION_IF_NULL
(
profiling_trace_net_output
);
MS_LOG
(
INFO
)
<<
"[profiling]Anf node's full name with scope:"
<<
anf_node
->
fullname_with_scope
();
if
(
!
profiling_trace_net_output
->
empty
())
{
MS_LOG
(
INFO
)
<<
"[profiling]Has got the net_output:"
<<
profiling_trace_net_output
->
c_str
();
return
true
;
}
if
(
AnfAlgo
::
IsRealKernel
(
anf_node
))
{
*
profiling_trace_net_output
=
anf_node
->
fullname_with_scope
();
return
true
;
}
std
::
string
ProfilingUtils
::
GetTraceBegin
(
const
std
::
vector
<
CNodePtr
>
&
cnode_exec_order
)
{
const
char
*
trace_begin
=
std
::
getenv
(
kFpStartNode
);
auto
&
first_cnode
=
cnode_exec_order
.
front
();
MS_EXCEPTION_IF_NULL
(
first_cnode
);
return
trace_begin
==
nullptr
?
first_cnode
->
fullname_with_scope
()
:
std
::
string
(
trace_begin
);
}
auto
cnode
=
anf_node
->
cast
<
CNodePtr
>
();
if
(
cnode
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"[profiling]Anf node should be a CNode"
;
return
false
;
}
std
::
string
ProfilingUtils
::
GetTraceBpEnd
()
{
const
char
*
trace_bp_end
=
std
::
getenv
(
kBpEndNode
);
return
trace_bp_end
==
nullptr
?
""
:
std
::
string
(
trace_bp_end
);
}
auto
inputs
=
cnode
->
inputs
();
auto
input_size
=
inputs
.
size
();
if
(
input_size
<
2
)
{
MS_LOG
(
ERROR
)
<<
"[profiling]Anf node' input size("
<<
input_size
<<
") < 2, don't support get apply kernel node."
;
return
false
;
}
return
GetNetOutput
(
inputs
[
1
],
profiling_trace_net_output
);
std
::
string
ProfilingUtils
::
GetTraceNetoutput
(
const
std
::
vector
<
CNodePtr
>
&
cnode_exec_order
)
{
const
char
*
trace_netoutput
=
std
::
getenv
(
kIterEndNode
);
auto
&
last_cnode
=
cnode_exec_order
.
back
();
MS_EXCEPTION_IF_NULL
(
last_cnode
);
return
trace_netoutput
==
nullptr
?
last_cnode
->
fullname_with_scope
()
:
std
::
string
(
trace_netoutput
);
}
CNodePtr
ProfilingUtils
::
CreateProfilingCNode
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
graph_ptr
,
bool
notify
,
uint64_t
profiler_trace_id
,
uint32_t
flags
)
{
MS_EXCEPTION_IF_NULL
(
graph_ptr
);
NotNull
<
CNodePtr
>
ProfilingUtils
::
CreateProfilingCNode
(
const
ProfilingContent
&
profiling_content
,
NotNull
<
session
::
KernelGraph
*>
graph_ptr
)
{
kernel
::
KernelBuildInfo
::
KernelBuildInfoBuilder
selected_kernel_builder
;
selected_kernel_builder
.
SetInputsFormat
({
kOpFormat_DEFAULT
,
kOpFormat_DEFAULT
});
selected_kernel_builder
.
SetInputsDeviceType
({
TypeId
::
kNumberTypeInt32
,
TypeId
::
kNumberTypeInt32
});
...
...
@@ -118,75 +95,79 @@ CNodePtr ProfilingUtils::CreateProfilingCNode(const std::shared_ptr<session::Ker
AnfAlgo
::
SetSelectKernelBuildInfo
(
selected_kernel_builder
.
Build
(),
cnode_ptr
.
get
());
cnode_ptr
->
set_abstract
(
type_none_abstract
);
// set attr
ValuePtr
notify_value
=
MakeValue
(
notify
);
ValuePtr
trace_id_value
=
MakeValue
(
profiler_trace_id
);
ValuePtr
flags_value
=
MakeValue
(
flags
);
ValuePtr
notify_value
=
MakeValue
(
profiling_content
.
notify
);
ValuePtr
trace_id_value
=
MakeValue
(
profil
ing_content
.
profil
er_trace_id
);
ValuePtr
flags_value
=
MakeValue
(
profiling_content
.
flags
);
AnfAlgo
::
SetNodeAttr
(
ProfilingUtils
::
kNotify
,
notify_value
,
cnode_ptr
);
AnfAlgo
::
SetNodeAttr
(
ProfilingUtils
::
kProfilerTraceId
,
trace_id_value
,
cnode_ptr
);
AnfAlgo
::
SetNodeAttr
(
ProfilingUtils
::
kFlags
,
flags_value
,
cnode_ptr
);
return
cnode_ptr
;
return
NOT_NULL
(
cnode_ptr
)
;
}
void
ProfilingUtils
::
ProfilingTraceFpStart
(
const
std
::
shared_ptr
<
mindspore
::
session
::
KernelGraph
>
&
graph_ptr
,
const
mindspore
::
AnfNodePtr
&
anf_node
,
const
mindspore
::
device
::
ascend
::
ProfilingTraceInfo
&
profiling_trace_info
,
std
::
vector
<
mindspore
::
CNodePtr
>
*
kernel_list
)
{
if
(
profiling_trace_info
.
IsValid
()
&&
profiling_trace_info
.
profiling_trace_begin
==
anf_node
->
fullname_with_scope
())
{
if
(
graph_ptr
==
nullptr
||
kernel_list
==
nullptr
||
anf_node
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"[profiling]input param invalid"
;
return
;
}
void
ProfilingUtils
::
ProfilingTraceFpStart
(
const
mindspore
::
AnfNodePtr
&
anf_node
,
const
ProfilingTraceInfo
&
profiling_trace_info
,
NotNull
<
session
::
KernelGraph
*>
graph_ptr
,
NotNull
<
std
::
vector
<
mindspore
::
CNodePtr
>
*>
kernel_list
)
{
if
(
profiling_trace_info
.
trace_begin
==
anf_node
->
fullname_with_scope
())
{
auto
job_id
=
ProfilingManager
::
GetInstance
().
GetJobId
();
// job task info
CNodePtr
job_kernel_ptr
=
CreateProfilingCNode
(
graph_ptr
,
false
,
job_id
,
0
);
AnfAlgo
::
SetStreamDistinctionLabel
(
AnfAlgo
::
GetStreamDistinctionLabel
(
anf_node
.
get
()),
job_kernel_ptr
.
get
());
AnfAlgo
::
SetStreamId
(
AnfAlgo
::
GetStreamId
(
anf_node
),
job_kernel_ptr
.
get
());
// fp task info
CNodePtr
start_kernel_ptr
=
CreateProfilingCNode
(
graph_ptr
,
false
,
kProfilingFpStartLogId
,
0
);
AnfAlgo
::
SetStreamDistinctionLabel
(
AnfAlgo
::
GetStreamDistinctionLabel
(
anf_node
.
get
()),
start_kernel_ptr
.
get
());
AnfAlgo
::
SetStreamId
(
AnfAlgo
::
GetStreamId
(
anf_node
),
start_kernel_ptr
.
get
());
kernel_list
->
emplace_back
(
job_kernel_ptr
);
kernel_list
->
emplace_back
(
start_kernel_ptr
);
ProfilingContent
job_profiling_context
=
{
false
,
job_id
,
0
};
auto
job_profiling_node
=
CreateProfilingCNodeWithStream
(
anf_node
,
job_profiling_context
,
graph_ptr
);
kernel_list
->
emplace_back
(
job_profiling_node
);
ProfilingContent
fp_profiling_content
=
{
false
,
kProfilingFpStartLogId
,
0
};
auto
fp_profiling_node
=
CreateProfilingCNodeWithStream
(
anf_node
,
fp_profiling_content
,
graph_ptr
);
kernel_list
->
emplace_back
(
fp_profiling_node
);
}
}
void
ProfilingUtils
::
ProfilingAllReduce
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
graph_ptr
,
const
AnfNodePtr
&
anf_node
,
int
job_id
,
const
std
::
string
&
profiling_node_name
,
std
::
vector
<
CNodePtr
>
*
kernel_list
)
{
MS_EXCEPTION_IF_NULL
(
graph_ptr
);
CNodePtr
ProfilingUtils
::
CreateProfilingCNodeWithStream
(
const
mindspore
::
AnfNodePtr
&
anf_node
,
const
ProfilingContent
&
profiling_content
,
NotNull
<
session
::
KernelGraph
*>
graph_ptr
)
{
CNodePtr
profiling_node
=
CreateProfilingCNode
(
profiling_content
,
graph_ptr
);
AnfAlgo
::
SetStreamDistinctionLabel
(
AnfAlgo
::
GetStreamDistinctionLabel
(
anf_node
.
get
()),
profiling_node
.
get
());
AnfAlgo
::
SetStreamId
(
AnfAlgo
::
GetStreamId
(
anf_node
),
profiling_node
.
get
());
return
profiling_node
;
}
void
ProfilingUtils
::
ProfilingCustomOp
(
const
AnfNodePtr
&
anf_node
,
const
ProfilingTraceInfo
&
profiling_trace_info
,
NotNull
<
session
::
KernelGraph
*>
graph_ptr
,
NotNull
<
std
::
vector
<
CNodePtr
>
*>
kernel_list
)
{
MS_EXCEPTION_IF_NULL
(
anf_node
);
MS_EXCEPTION_IF_NULL
(
kernel_list
);
auto
full_scope_name
=
anf_node
->
fullname_with_scope
();
if
(
profiling_node_name
==
full_scope_name
)
{
CNodePtr
allreduce_kernel_ptr
=
CreateProfilingCNode
(
graph_ptr
,
false
,
job_id
,
0
);
AnfAlgo
::
SetStreamDistinctionLabel
(
AnfAlgo
::
GetStreamDistinctionLabel
(
anf_node
.
get
()),
allreduce_kernel_ptr
.
get
());
AnfAlgo
::
SetStreamId
(
AnfAlgo
::
GetStreamId
(
anf_node
),
allreduce_kernel_ptr
.
get
());
kernel_list
->
emplace_back
(
allreduce_kernel_ptr
);
auto
iter
=
profiling_trace_info
.
trace_custom_node
.
find
(
anf_node
->
fullname_with_scope
());
if
(
iter
==
profiling_trace_info
.
trace_custom_node
.
end
())
{
return
;
}
// custom op profiling job start from 3.
ProfilingContent
front_profiling_content
=
{
false
,
2
*
custom_node_index_
+
1
,
0
};
CNodePtr
front_node
=
CreateProfilingCNodeWithStream
(
anf_node
,
front_profiling_content
,
graph_ptr
);
kernel_list
->
insert
(
kernel_list
->
end
()
-
1
,
front_node
);
ProfilingContent
back_profiling_content
=
{
false
,
2
*
custom_node_index_
+
2
,
0
};
CNodePtr
back_node
=
CreateProfilingCNodeWithStream
(
anf_node
,
back_profiling_content
,
graph_ptr
);
kernel_list
->
insert
(
kernel_list
->
end
(),
back_node
);
++
custom_node_index_
;
}
void
ProfilingUtils
::
ProfilingTraceEnd
(
const
std
::
shared_ptr
<
mindspore
::
session
::
KernelGraph
>
&
graph_ptr
,
const
mindspore
::
AnfNodePtr
&
anf_node
,
const
mindspore
::
device
::
ascend
::
ProfilingTraceInfo
&
profiling_trace_info
,
std
::
vector
<
mindspore
::
CNodePtr
>
*
kernel_list
)
{
MS_EXCEPTION_IF_NULL
(
graph_ptr
);
void
ProfilingUtils
::
ProfilingTraceBpEnd
(
const
AnfNodePtr
&
anf_node
,
const
ProfilingTraceInfo
&
profiling_trace_info
,
NotNull
<
session
::
KernelGraph
*>
graph_ptr
,
NotNull
<
std
::
vector
<
CNodePtr
>
*>
kernel_list
)
{
MS_EXCEPTION_IF_NULL
(
anf_node
);
MS_EXCEPTION_IF_NULL
(
kernel_list
);
if
(
profiling_trace_info
.
IsValid
())
{
auto
full_scope_name
=
anf_node
->
fullname_with_scope
();
if
(
profiling_trace_info
.
profiling_trace_netoutput
==
full_scope_name
)
{
CNodePtr
bp_kernel_ptr
=
CreateProfilingCNode
(
graph_ptr
,
true
,
kProfilingIterEndLogId
,
0
);
AnfAlgo
::
SetStreamDistinctionLabel
(
AnfAlgo
::
GetStreamDistinctionLabel
(
anf_node
.
get
()),
bp_kernel_ptr
.
get
());
AnfAlgo
::
SetStreamId
(
AnfAlgo
::
GetStreamId
(
anf_node
),
bp_kernel_ptr
.
get
());
kernel_list
->
emplace_back
(
bp_kernel_ptr
);
}
if
(
profiling_trace_info
.
trace_bp_end
==
anf_node
->
fullname_with_scope
())
{
ProfilingContent
bp_end_profiling_content
=
{
false
,
kProfilingBpEndLogId
,
0
};
CNodePtr
bp_end_node
=
CreateProfilingCNodeWithStream
(
anf_node
,
bp_end_profiling_content
,
graph_ptr
);
kernel_list
->
emplace_back
(
bp_end_node
);
}
}
if
(
profiling_trace_info
.
profiling_trace_bp_end
==
full_scope_name
)
{
CNodePtr
end_task_info
=
CreateProfilingCNode
(
graph_ptr
,
false
,
kProfilingBpEndLogId
,
0
);
AnfAlgo
::
SetStreamDistinctionLabel
(
AnfAlgo
::
GetStreamDistinctionLabel
(
anf_node
.
get
()),
end_task_info
.
get
());
AnfAlgo
::
SetStreamId
(
AnfAlgo
::
GetStreamId
(
anf_node
),
end_task_info
.
get
());
kernel_list
->
emplace_back
(
end_task_info
);
}
void
ProfilingUtils
::
ProfilingTraceEnd
(
const
AnfNodePtr
&
anf_node
,
const
ProfilingTraceInfo
&
profiling_trace_info
,
NotNull
<
session
::
KernelGraph
*>
graph_ptr
,
NotNull
<
std
::
vector
<
mindspore
::
CNodePtr
>
*>
kernel_list
)
{
MS_EXCEPTION_IF_NULL
(
anf_node
);
auto
full_scope_name
=
anf_node
->
fullname_with_scope
();
if
(
profiling_trace_info
.
trace_netoutput
==
full_scope_name
)
{
ProfilingContent
bp_end_profiling_content
=
{
true
,
kProfilingIterEndLogId
,
0
};
CNodePtr
bp_kernel_ptr
=
CreateProfilingCNodeWithStream
(
anf_node
,
bp_end_profiling_content
,
graph_ptr
);
kernel_list
->
emplace_back
(
bp_kernel_ptr
);
}
}
...
...
mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
浏览文件 @
effdb483
...
...
@@ -19,63 +19,102 @@
#include <memory>
#include <string>
#include <vector>
#include <set>
#include <unordered_map>
#include "session/kernel_graph.h"
#include "utils/contract.h"
namespace
mindspore
{
namespace
device
{
namespace
ascend
{
struct
ProfilingTraceInfo
{
// execute order's first execute op(like: Cast or Four2Five ...), except tdt op(GetNext ...)
std
::
string
profiling_
trace_begin
;
std
::
string
trace_begin
;
// get first net_output(apply kernel) from graph outputs: fp ->net_output<- bp
std
::
string
profiling_
trace_bp_end
;
std
::
string
trace_bp_end
;
// execute order's end execute (like: Conv2DBackpropFilter)
std
::
string
profiling_
trace_netoutput
;
std
::
string
trace_netoutput
;
std
::
string
profiling_allreduce1_start
;
std
::
string
profiling_allreduce1_end
;
std
::
string
profiling_allreduce2_start
;
std
::
string
profiling_allreduce2_end
;
// profiling specific op, such as AllReduce;
std
::
set
<
std
::
string
>
trace_custom_node
;
// 1. insert profiling_trace_begin if profiling_trace_bp_end is not empty.
// 2. op lanuch get task info with callback func.
// 3. insert profiling_trace_bp_end.
// 4. insert profiling_trace_net_output if profiling_trace_bp_end is not empty.
bool
IsValid
()
const
{
return
!
(
profiling_trace_begin
.
empty
()
||
profiling_trace_bp_end
.
empty
());
}
bool
IsValid
()
const
{
return
!
(
trace_begin
.
empty
()
||
trace_bp_end
.
empty
()
||
trace_netoutput
.
empty
());
}
};
struct
ProfilingContent
{
// true -send data from device to host and finish profiling
bool
notify
;
uint64_t
profiler_trace_id
;
uint32_t
flags
;
};
class
ProfilingUtils
{
public:
ProfilingUtils
()
=
default
;
~
ProfilingUtils
()
=
default
;
static
bool
GetProfilingTraceInfo
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
graph_ptr
,
ProfilingTraceInfo
*
profiling_trace_info
);
static
void
ProfilingTraceFpStart
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
graph_ptr
,
const
AnfNodePtr
&
anf_node
,
const
ProfilingTraceInfo
&
profiling_trace_info
,
std
::
vector
<
CNodePtr
>
*
kernel_list
);
static
void
ProfilingAllReduce
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
graph_ptr
,
const
AnfNodePtr
&
anf_node
,
int
job_id
,
const
std
::
string
&
profiling_node_name
,
std
::
vector
<
CNodePtr
>
*
kernel_list
);
static
void
ProfilingTraceEnd
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
graph_ptr
,
const
AnfNodePtr
&
anf_node
,
const
ProfilingTraceInfo
&
profiling_trace_info
,
std
::
vector
<
CNodePtr
>
*
kernel_list
);
// Insert job_id profiling node and fp_start profiling node.
// Job_id is got from envs, which shound be a number greater than 255
// Fp_start node should been inserted in the start of a network, and the log_id is hard code to 1.
static
void
ProfilingTraceFpStart
(
const
AnfNodePtr
&
anf_node
,
const
ProfilingTraceInfo
&
profiling_trace_info
,
NotNull
<
session
::
KernelGraph
*>
graph_ptr
,
NotNull
<
std
::
vector
<
CNodePtr
>
*>
kernel_list
);
// Insert net output profiling node, which tells the device to stop profiling.
// The notify in struct ProfilingContent should be 'true', which tells the device to send data to host.
static
void
ProfilingTraceEnd
(
const
AnfNodePtr
&
anf_node
,
const
ProfilingTraceInfo
&
profiling_trace_info
,
NotNull
<
session
::
KernelGraph
*>
graph_ptr
,
NotNull
<
std
::
vector
<
CNodePtr
>
*>
kernel_list
);
// Insert bp_end profiling node, which should been inserted after the last backpropagation CNode in the network.
static
void
ProfilingTraceBpEnd
(
const
mindspore
::
AnfNodePtr
&
anf_node
,
const
ProfilingTraceInfo
&
profiling_trace_info
,
NotNull
<
session
::
KernelGraph
*>
graph_ptr
,
NotNull
<
std
::
vector
<
mindspore
::
CNodePtr
>
*>
kernel_list
);
// Mapping graph id and the kernels' name in the graph
static
void
SetGraphKernelName
(
uint32_t
graph_id
,
const
std
::
vector
<
std
::
string
>
&
kernel_names
);
// Mapping task_id and kernel name for device to generate the time cost of specific kernel.
// Device calculate the time cost of the task which is marked by task id.
// But we need data of (kernel name , time cost)
static
void
ReportProfilingData
(
uint32_t
graph_id
,
const
std
::
vector
<
uint32_t
>
&
task_ids
);
static
const
char
kProfiling
[];
static
const
char
kNotify
[];
static
const
char
kProfilerTraceId
[];
static
const
char
kFlags
[];
// Get profiling trace point from envs.
// export PROFILING_FP_START='full name of the first cnode to execute'
// export PROFILING_BP_END='full name of the last backpropagation cnode to execute'
// export PROFILING_ITER_END='full name of last cnode in graph to execute'
// And other cnode, like AllReduce, export PROFILING_CUSTOM_1='full name of AllReduce cnode'
// GetNext, export PROFIFLING_CUSTOM_2='full name fo GetNext cnode'
// The variable i in PROFILING_CUSTOM_i should start from 1 without interruption.
static
ProfilingTraceInfo
GetProfilingTraceFromEnv
(
NotNull
<
session
::
KernelGraph
*>
graph_ptr
);
// Insert two profiling trace points, one in front and one behind
static
void
ProfilingCustomOp
(
const
mindspore
::
AnfNodePtr
&
anf_node
,
const
ProfilingTraceInfo
&
profiling_trace_info
,
NotNull
<
session
::
KernelGraph
*>
graph_ptr
,
NotNull
<
std
::
vector
<
mindspore
::
CNodePtr
>
*>
kernel_list
);
inline
static
constexpr
char
kProfiling
[]
=
"Profiling"
;
inline
static
constexpr
char
kNotify
[]
=
"notify"
;
inline
static
constexpr
char
kProfilerTraceId
[]
=
"profiler_trace_id"
;
inline
static
constexpr
char
kFlags
[]
=
"flags"
;
private:
static
bool
GetNetOutput
(
AnfNodePtr
anf_node
,
std
::
string
*
profiling_trace_net_output
);
static
CNodePtr
CreateProfilingCNode
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
graph_ptr
,
bool
notify
,
uint64_t
profiler_trace_id
,
uint32_t
flags
);
static
NotNull
<
CNodePtr
>
CreateProfilingCNode
(
const
ProfilingContent
&
profiling_content
,
NotNull
<
session
::
KernelGraph
*>
graph_ptr
);
static
CNodePtr
CreateProfilingCNodeWithStream
(
const
AnfNodePtr
&
anf_node
,
const
ProfilingContent
&
profiling_content
,
NotNull
<
session
::
KernelGraph
*>
graph_ptr
);
static
std
::
string
GetTraceBegin
(
const
std
::
vector
<
CNodePtr
>
&
cnode_exec_order
);
static
std
::
string
GetTraceBpEnd
();
static
std
::
string
GetTraceNetoutput
(
const
std
::
vector
<
CNodePtr
>
&
cnode_exec_order
);
// graph id --> (kernel name list)
static
std
::
unordered_map
<
uint32_t
,
std
::
vector
<
std
::
string
>>
graph_kernel_name_
;
static
uint32_t
custom_node_index_
;
};
}
// namespace ascend
}
// namespace device
...
...
mindspore/ccsrc/device/kernel_adjust.cc
浏览文件 @
effdb483
...
...
@@ -438,23 +438,22 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
MS_LOG
(
INFO
)
<<
"---------------- LoadSwitchInputs End--"
;
}
void
KernelAdjust
::
Profiling
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
)
{
void
KernelAdjust
::
Profiling
(
NotNull
<
session
::
KernelGraph
*>
kernel_graph_ptr
)
{
if
(
!
ascend
::
ProfilingManager
::
GetInstance
().
IsProfiling
())
{
MS_LOG
(
INFO
)
<<
"No need to profiling"
;
return
;
}
ProfilingTraceInfo
profiling_trace_info
;
if
(
ProfilingUtils
::
GetProfilingTraceInfo
(
kernel_graph_ptr
,
&
profiling_trace_info
))
{
InsertProfilingKernel
(
kernel_graph_ptr
,
profiling_trace_info
);
}
else
{
MS_LOG
(
WARNING
)
<<
"[profiling] GetProfilingTraceInfo failed"
;
ProfilingTraceInfo
profiling_trace_info
=
ProfilingUtils
::
GetProfilingTraceFromEnv
(
kernel_graph_ptr
);
if
(
!
profiling_trace_info
.
IsValid
())
{
MS_LOG
(
WARNING
)
<<
"[profiling] no profiling node found!"
;
return
;
}
InsertProfilingKernel
(
profiling_trace_info
,
kernel_graph_ptr
);
}
void
KernelAdjust
::
InsertProfilingKernel
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
,
const
ProfilingTraceInfo
&
profiling_trace_info
)
{
void
KernelAdjust
::
InsertProfilingKernel
(
const
ProfilingTraceInfo
&
profiling_trace_info
,
NotNull
<
session
::
KernelGraph
*>
kernel_graph_ptr
)
{
MS_LOG
(
INFO
)
<<
"[profiling] Insert profiling kernel start"
;
MS_EXCEPTION_IF_NULL
(
kernel_graph_ptr
);
if
(
!
profiling_trace_info
.
IsValid
())
{
MS_LOG
(
WARNING
)
<<
"Profiling trace point not found"
;
return
;
...
...
@@ -462,18 +461,12 @@ void KernelAdjust::InsertProfilingKernel(const std::shared_ptr<session::KernelGr
std
::
vector
<
CNodePtr
>
new_cnode_list
;
std
::
vector
<
CNodePtr
>
cnode_ptr_list
=
kernel_graph_ptr
->
execution_order
();
for
(
const
auto
&
cnode_ptr
:
cnode_ptr_list
)
{
ProfilingUtils
::
ProfilingTraceFpStart
(
kernel_graph_ptr
,
cnode_ptr
,
profiling_trace_info
,
&
new_cnode_list
);
ProfilingUtils
::
ProfilingAllReduce
(
kernel_graph_ptr
,
cnode_ptr
,
ascend
::
kProfilingAllReduce1Start
,
profiling_trace_info
.
profiling_allreduce1_start
,
&
new_cnode_list
);
ProfilingUtils
::
ProfilingAllReduce
(
kernel_graph_ptr
,
cnode_ptr
,
ascend
::
kProfilingAllReduce2Start
,
profiling_trace_info
.
profiling_allreduce2_start
,
&
new_cnode_list
);
ProfilingUtils
::
ProfilingTraceFpStart
(
cnode_ptr
,
profiling_trace_info
,
kernel_graph_ptr
,
NOT_NULL
(
&
new_cnode_list
));
new_cnode_list
.
emplace_back
(
cnode_ptr
);
ProfilingUtils
::
ProfilingAllReduce
(
kernel_graph_ptr
,
cnode_ptr
,
ascend
::
kProfilingAllReduce1End
,
profiling_trace_info
.
profiling_allreduce1_end
,
&
new_cnode_list
);
ProfilingUtils
::
ProfilingAllReduce
(
kernel_graph_ptr
,
cnode_ptr
,
ascend
::
kProfilingAllReduce2End
,
profiling_trace_info
.
profiling_allreduce2_end
,
&
new_cnode_list
);
ProfilingUtils
::
ProfilingTraceEnd
(
kernel_graph_ptr
,
cnode_ptr
,
profiling_trace_info
,
&
new_cnode_list
);
ProfilingUtils
::
ProfilingCustomOp
(
cnode_ptr
,
profiling_trace_info
,
kernel_graph_ptr
,
NOT_NULL
(
&
new_cnode_list
));
ProfilingUtils
::
ProfilingTraceBpEnd
(
cnode_ptr
,
profiling_trace_info
,
kernel_graph_ptr
,
NOT_NULL
(
&
new_cnode_list
));
ProfilingUtils
::
ProfilingTraceEnd
(
cnode_ptr
,
profiling_trace_info
,
kernel_graph_ptr
,
NOT_NULL
(
&
new_cnode_list
));
}
kernel_graph_ptr
->
set_execution_order
(
new_cnode_list
);
}
...
...
mindspore/ccsrc/device/kernel_adjust.h
浏览文件 @
effdb483
...
...
@@ -48,7 +48,7 @@ class KernelAdjust {
void
SetStreamSwitchOps
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
);
bool
StepLoadCtrlInputs
(
const
std
::
shared_ptr
<
session
::
Context
>
&
context
,
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
);
void
Profiling
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
);
void
Profiling
(
NotNull
<
session
::
KernelGraph
*>
kernel_graph_ptr
);
static
bool
NeedInsertSwitch
();
CNodePtr
CreateSteamActiveOp
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
);
...
...
@@ -66,8 +66,8 @@ class KernelAdjust {
kernel
::
KernelBuildInfo
::
KernelBuildInfoBuilder
CreateMngKernelBuilder
(
const
std
::
vector
<
std
::
string
>
&
formats
,
const
std
::
vector
<
TypeId
>
&
type_ids
);
void
LoadSwitchInputs
(
std
::
vector
<
tensor
::
TensorPtr
>
*
inputs
);
void
InsertProfilingKernel
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
,
const
ProfilingTraceInfo
&
profiling_trace_info
);
void
InsertProfilingKernel
(
const
ProfilingTraceInfo
&
profiling_trace_info
,
NotNull
<
session
::
KernelGraph
*>
kernel_graph_ptr
);
};
}
// namespace device
}
// namespace mindspore
...
...
mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
浏览文件 @
effdb483
...
...
@@ -246,7 +246,7 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
kernel_graph
->
SetExecOrderByDefault
();
if
(
save_graphs
)
{
std
::
string
file_path
=
save_graphs_path
+
"/"
+
"hwopt_d_end.ir"
;
DumpIR
(
file_path
,
kernel_graph
);
DumpIR
(
file_path
,
kernel_graph
,
true
);
DumpIRProto
(
kernel_graph
,
"after_hwopt"
);
}
}
...
...
mindspore/ccsrc/session/ascend_session.cc
浏览文件 @
effdb483
...
...
@@ -136,7 +136,7 @@ void AscendSession::BuildGraph(GraphId graph_id) {
// Assign streams for control sink and hccl and so on
AssignStream
(
graph
);
device
::
KernelAdjust
::
GetInstance
().
Profiling
(
graph
);
device
::
KernelAdjust
::
GetInstance
().
Profiling
(
NOT_NULL
(
graph
.
get
())
);
// build kernel if node is cnode
BuildKernel
(
graph
);
auto
ms_context
=
MsContext
::
GetInstance
();
...
...
tests/ut/cpp/stub/tasksink/ascend_stream_assign_stub.cc
浏览文件 @
effdb483
...
...
@@ -42,6 +42,6 @@ bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr<session::Context> &c
return
true
;
}
bool
KernelAdjust
::
NeedInsertSwitch
()
{
return
true
;
}
void
KernelAdjust
::
Profiling
(
const
std
::
shared_ptr
<
session
::
KernelGraph
>
&
kernel_graph_ptr
)
{
return
;
}
void
KernelAdjust
::
Profiling
(
NotNull
<
session
::
KernelGraph
*>
kernel_graph_ptr
)
{
return
;
}
}
// namespace device
}
// namespace mindspore
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录