Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
pingzhuyan
mindspore
提交
4149274b
M
mindspore
项目概览
pingzhuyan
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
4149274b
编写于
8月 17, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 17, 2020
浏览文件
操作
浏览文件
下载
差异文件
!4218 add data saver module for gpu profiler
Merge pull request !4218 from yelihua/temp-dev
上级
d1ad3367
31e61f71
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
497 addition
and
102 deletion
+497
-102
mindspore/ccsrc/profiler/device/gpu/cupti_interface.cc
mindspore/ccsrc/profiler/device/gpu/cupti_interface.cc
+1
-1
mindspore/ccsrc/profiler/device/gpu/data_saver.cc
mindspore/ccsrc/profiler/device/gpu/data_saver.cc
+223
-0
mindspore/ccsrc/profiler/device/gpu/data_saver.h
mindspore/ccsrc/profiler/device/gpu/data_saver.h
+153
-0
mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc
mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc
+6
-1
mindspore/profiler/parser/minddata_parser.py
mindspore/profiler/parser/minddata_parser.py
+13
-9
mindspore/profiler/profiling.py
mindspore/profiler/profiling.py
+101
-91
未找到文件。
mindspore/ccsrc/profiler/device/gpu/cupti_interface.cc
浏览文件 @
4149274b
...
...
@@ -126,7 +126,7 @@ CUptiResult CuptiGetStreamId(CUcontext context, CUstream stream, uint32_t *strea
}
CUptiResult
CuptiGetDeviceId
(
CUcontext
context
,
uint32_t
*
deviceId
)
{
static
auto
func_ptr
=
reinterpret_cast
<
CuptiGetDeviceIdFunc
>
(
GetCUPTIFunc
(
"cupti
Subscribe
"
));
static
auto
func_ptr
=
reinterpret_cast
<
CuptiGetDeviceIdFunc
>
(
GetCUPTIFunc
(
"cupti
GetDeviceId
"
));
return
func_ptr
(
context
,
deviceId
);
}
}
// namespace gpu
...
...
mindspore/ccsrc/profiler/device/gpu/data_saver.cc
0 → 100644
浏览文件 @
4149274b
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "profiler/device/gpu/data_saver.h"
#include <fstream>
#include <numeric>
#include "utils/log_adapter.h"
namespace
mindspore
{
namespace
profiler
{
namespace
gpu
{
OpDetailInfo
::
OpDetailInfo
(
std
::
shared_ptr
<
OpInfo
>
op_info
,
float
proportion
)
:
op_info_
(
op_info
),
proportion_
(
proportion
)
{
// op_full_name is like 'xxx/xxx/{op_type}-op{node_id}'
op_full_name_
=
op_info
->
op_name
;
auto
op_type_begin_iter
=
op_full_name_
.
rfind
(
'/'
)
+
1
;
auto
op_type_end_iter
=
op_full_name_
.
rfind
(
'-'
);
op_type_
=
op_full_name_
.
substr
(
op_type_begin_iter
,
op_type_end_iter
-
op_type_begin_iter
);
op_name_
=
op_full_name_
.
substr
(
op_type_begin_iter
);
op_avg_time_
=
op_info
->
op_host_cost_time
/
op_info
->
op_count
;
}
ActivityData
::
ActivityData
(
std
::
shared_ptr
<
Event
>
data
)
:
basic_info_
(
data
)
{
grid_dim_
=
basic_info_
->
activity_type
==
ActivityType
::
kKernel
?
"
\"
"
+
std
::
to_string
(
basic_info_
->
kernel_info
.
grid_x
)
+
','
+
std
::
to_string
(
basic_info_
->
kernel_info
.
grid_y
)
+
','
+
std
::
to_string
(
basic_info_
->
kernel_info
.
grid_z
)
+
"
\"
"
:
""
;
block_dim_
=
basic_info_
->
activity_type
==
ActivityType
::
kKernel
?
"
\"
"
+
std
::
to_string
(
basic_info_
->
kernel_info
.
block_x
)
+
','
+
std
::
to_string
(
basic_info_
->
kernel_info
.
block_y
)
+
','
+
std
::
to_string
(
basic_info_
->
kernel_info
.
block_z
)
+
"
\"
"
:
""
;
count_
=
1
;
total_duration_
=
(
basic_info_
->
end_time_stamp
-
basic_info_
->
start_time_stamp
)
/
kTimeUnit
;
avg_duration_
=
total_duration_
;
max_duration_
=
total_duration_
;
min_duration_
=
total_duration_
;
}
ActivityData
&
ActivityData
::
operator
+=
(
const
ActivityData
&
other
)
{
this
->
count_
+=
other
.
count_
;
this
->
total_duration_
+=
other
.
total_duration_
;
// update max or min duration
if
(
other
.
total_duration_
>
this
->
max_duration_
)
{
this
->
max_duration_
=
other
.
total_duration_
;
}
else
if
(
other
.
max_duration_
<
this
->
min_duration_
)
{
this
->
min_duration_
=
other
.
total_duration_
;
}
return
*
this
;
}
void
DataSaver
::
ParseOpInfo
(
const
OpInfoMap
&
op_info_maps
)
{
op_detail_infos_
.
reserve
(
op_info_maps
.
size
());
float
total_time_sum
=
GetTotalOpTime
(
op_info_maps
);
for
(
auto
item
:
op_info_maps
)
{
float
proportion
=
item
.
second
.
op_host_cost_time
/
total_time_sum
;
auto
op_info
=
std
::
make_shared
<
OpInfo
>
(
item
.
second
);
OpDetailInfo
op_detail_info
=
OpDetailInfo
(
op_info
,
proportion
);
op_detail_infos_
.
emplace_back
(
op_detail_info
);
AddOpDetailInfoForType
(
op_detail_info
);
}
// update average time of op type
for
(
auto
&
op_type
:
op_type_infos_
)
{
// device_infos: <type_name, op_type_info>
op_type
.
second
.
avg_time_
=
op_type
.
second
.
total_time_
/
op_type
.
second
.
count_
;
}
MS_LOG
(
DEBUG
)
<<
"Get "
<<
op_detail_infos_
.
size
()
<<
" operation items."
;
MS_LOG
(
DEBUG
)
<<
"Get "
<<
op_type_infos_
.
size
()
<<
" operation type items."
;
}
void
DataSaver
::
AddOpDetailInfoForType
(
const
OpDetailInfo
&
op_detail_info
)
{
// Construct OpType object according to op detail info
OpType
op_type
=
OpType
{
op_detail_info
.
op_type_
,
op_detail_info
.
op_info_
->
op_count
,
op_detail_info
.
op_info_
->
op_host_cost_time
,
0
,
op_detail_info
.
proportion_
};
// Set the OpType into op_type_infos_ map
std
::
string
type_name
=
op_detail_info
.
op_type_
;
auto
iter
=
op_type_infos_
.
find
(
type_name
);
if
(
iter
==
op_type_infos_
.
end
())
{
op_type_infos_
.
emplace
(
type_name
,
op_type
);
}
else
{
iter
->
second
+=
op_type
;
}
}
float
DataSaver
::
GetTotalOpTime
(
const
OpInfoMap
&
op_info_maps
)
{
float
sum
=
0
;
sum
=
std
::
accumulate
(
op_info_maps
.
begin
(),
op_info_maps
.
end
(),
sum
,
[](
float
i
,
auto
iter
)
{
return
i
+
iter
.
second
.
op_host_cost_time
;
});
MS_LOG
(
DEBUG
)
<<
"The total op time is "
<<
sum
;
return
sum
;
}
void
DataSaver
::
ParseEvent
(
const
std
::
vector
<
Event
>
&
events
)
{
// Put Kernel activity events into activity_infos_
for
(
const
auto
&
event
:
events
)
{
if
(
event
.
op_name
.
empty
()
||
event
.
api_type
!=
CUPTIApiType
::
kActivity
||
event
.
activity_type
!=
ActivityType
::
kKernel
)
{
continue
;
}
AddKernelEvent
(
event
);
}
// update average time of kernel op cost
for
(
auto
&
device_infos
:
activity_infos_
)
{
// device_infos: <device_id, DeviceActivityInfos>
for
(
auto
&
activity_info
:
device_infos
.
second
)
{
// activity_info: <kernel_name, Activity>
activity_info
.
second
.
avg_duration_
=
activity_info
.
second
.
total_duration_
/
activity_info
.
second
.
count_
;
}
MS_LOG
(
DEBUG
)
<<
"Get "
<<
device_infos
.
second
.
size
()
<<
" activity items for device:"
<<
device_infos
.
first
;
}
}
void
DataSaver
::
AddKernelEvent
(
const
Event
&
event
)
{
// Put kernel event to activity_infos according to device id
uint32_t
device_id
=
event
.
device_id
;
auto
iter
=
activity_infos_
.
find
(
device_id
);
if
(
iter
==
activity_infos_
.
end
())
{
auto
res_flag
=
activity_infos_
.
emplace
(
device_id
,
DeviceActivityInfos
());
AddKernelEventToDevice
(
event
,
&
res_flag
.
first
->
second
);
}
else
{
AddKernelEventToDevice
(
event
,
&
iter
->
second
);
}
}
void
DataSaver
::
AddKernelEventToDevice
(
const
Event
&
event
,
DeviceActivityInfos
*
device_activity_infos
)
{
// Combine kernel activity with same kernel name
auto
event_ptr
=
std
::
make_shared
<
Event
>
(
event
);
ActivityData
activity_data
=
ActivityData
(
event_ptr
);
std
::
string
kernel_name
=
event
.
kernel_name
;
auto
iter
=
device_activity_infos
->
find
(
kernel_name
);
if
(
iter
==
device_activity_infos
->
end
())
{
device_activity_infos
->
emplace
(
kernel_name
,
activity_data
);
}
else
{
iter
->
second
+=
activity_data
;
}
}
void
DataSaver
::
WriteFile
(
std
::
string
out_path_dir
)
{
if
(
out_path_dir
.
empty
())
{
MS_LOG
(
WARNING
)
<<
"Output directory. Ignore the writing data."
;
return
;
}
if
(
op_detail_infos_
.
empty
()
||
op_type_infos_
.
empty
()
||
activity_infos_
.
empty
())
{
MS_LOG
(
WARNING
)
<<
"No operation detail infos to write."
;
return
;
}
// not support multi-device for operator info per process yet
device_id_
=
std
::
to_string
(
activity_infos_
.
begin
()
->
first
);
WriteOpDetail
(
out_path_dir
);
WriteOpType
(
out_path_dir
);
WriteActivity
(
out_path_dir
);
}
void
DataSaver
::
WriteOpType
(
const
std
::
string
&
saver_base_dir
)
{
std
::
string
file_path
=
saver_base_dir
+
"/gpu_op_type_info_"
+
device_id_
+
".csv"
;
std
::
ofstream
ofs
(
file_path
);
// check if the file is writable
if
(
!
ofs
.
is_open
())
{
MS_LOG
(
WARNING
)
<<
"Open file '"
<<
file_path
<<
"' failed!"
;
return
;
}
// write op type info into file
ofs
<<
OpType
().
GetHeader
()
<<
std
::
endl
;
for
(
auto
op_type_info
:
op_type_infos_
)
{
ofs
<<
op_type_info
.
second
<<
std
::
endl
;
}
ofs
.
close
();
MS_LOG
(
INFO
)
<<
"Write "
<<
op_type_infos_
.
size
()
<<
" op type infos into file: "
<<
file_path
;
}
void
DataSaver
::
WriteOpDetail
(
const
std
::
string
&
saver_base_dir
)
{
std
::
string
file_path
=
saver_base_dir
+
"/gpu_op_detail_info_"
+
device_id_
+
".csv"
;
std
::
ofstream
ofs
(
file_path
);
if
(
!
ofs
.
is_open
())
{
MS_LOG
(
WARNING
)
<<
"Open file '"
<<
file_path
<<
"' failed!"
;
return
;
}
// write op detail info into file
ofs
<<
OpDetailInfo
().
GetHeader
()
<<
std
::
endl
;
for
(
auto
op_detail
:
op_detail_infos_
)
{
ofs
<<
op_detail
<<
std
::
endl
;
}
ofs
.
close
();
MS_LOG
(
INFO
)
<<
"Write "
<<
op_detail_infos_
.
size
()
<<
" op detail infos into file: "
<<
file_path
;
}
void
DataSaver
::
WriteActivity
(
const
std
::
string
&
saver_base_dir
)
{
std
::
string
file_path_base
=
saver_base_dir
+
"/gpu_activity_data_"
;
for
(
auto
device_info
:
activity_infos_
)
{
std
::
string
file_path
=
file_path_base
+
std
::
to_string
(
device_info
.
first
)
+
".csv"
;
std
::
ofstream
ofs
(
file_path
);
if
(
!
ofs
.
is_open
())
{
MS_LOG
(
WARNING
)
<<
"Open file '"
<<
file_path
<<
"' failed!"
;
return
;
}
// write activity data into file
ofs
<<
ActivityData
().
GetHeader
()
<<
std
::
endl
;
for
(
auto
activity_data
:
device_info
.
second
)
{
ofs
<<
activity_data
.
second
<<
std
::
endl
;
}
ofs
.
close
();
MS_LOG
(
INFO
)
<<
"Write "
<<
device_info
.
second
.
size
()
<<
" activity infos into file: "
<<
file_path
;
}
}
}
// namespace gpu
}
// namespace profiler
}
// namespace mindspore
mindspore/ccsrc/profiler/device/gpu/data_saver.h
0 → 100644
浏览文件 @
4149274b
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_DATA_SAVER_H
#define MINDSPORE_DATA_SAVER_H
#include <iostream>
#include <unordered_map>
#include <vector>
#include <string>
#include <memory>
#include "profiler/device/gpu/gpu_profiling.h"
namespace
mindspore
{
namespace
profiler
{
namespace
gpu
{
struct
OpDetailInfo
{
std
::
string
op_type_
;
std
::
string
op_name_
;
std
::
string
op_full_name_
;
std
::
shared_ptr
<
OpInfo
>
op_info_
{
nullptr
};
float
op_avg_time_
{
0
};
float
proportion_
{
0
};
OpDetailInfo
()
=
default
;
OpDetailInfo
(
std
::
shared_ptr
<
OpInfo
>
op_info
,
float
proportion
);
std
::
string
GetHeader
()
const
{
return
"op_side,op_type,op_name,op_full_name,op_occurrences,op_total_time(us),op_avg_time(us),total_proportion,"
"cuda_activity_cost_time(us),cuda_activity_call_count"
;
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
OpDetailInfo
&
event
)
{
os
<<
"Device,"
<<
event
.
op_type_
<<
','
<<
event
.
op_name_
<<
','
<<
event
.
op_full_name_
<<
','
<<
event
.
op_info_
->
op_count
<<
','
<<
event
.
op_info_
->
op_host_cost_time
<<
','
<<
event
.
op_avg_time_
<<
','
<<
event
.
proportion_
<<
','
<<
event
.
op_info_
->
cupti_activity_time
<<
','
<<
event
.
op_info_
->
op_kernel_count
;
return
os
;
}
};
struct
OpType
{
std
::
string
op_type_
;
int
count_
{
0
};
float
total_time_
{
0
};
float
avg_time_
{
0
};
float
proportion_
{
0
};
std
::
string
GetHeader
()
const
{
return
"op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"
;
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
OpType
&
event
)
{
os
<<
event
.
op_type_
<<
','
<<
event
.
count_
<<
','
<<
event
.
total_time_
<<
','
<<
event
.
proportion_
<<
','
<<
event
.
avg_time_
;
return
os
;
}
OpType
&
operator
+=
(
const
OpType
&
other
)
{
this
->
count_
+=
other
.
count_
;
this
->
total_time_
+=
other
.
total_time_
;
this
->
proportion_
+=
other
.
proportion_
;
return
*
this
;
}
};
struct
ActivityData
{
std
::
shared_ptr
<
Event
>
basic_info_
{
nullptr
};
std
::
string
block_dim_
;
std
::
string
grid_dim_
;
int
count_
{
0
};
float
total_duration_
{
0
};
float
avg_duration_
{
0
};
float
max_duration_
{
0
};
float
min_duration_
{
0
};
ActivityData
()
=
default
;
explicit
ActivityData
(
std
::
shared_ptr
<
Event
>
data
);
std
::
string
GetHeader
()
const
{
return
"name,type,op_full_name,stream_id,block_dim,grid_dim,occurrences,"
"total_duration(us),avg_duration(us),max_duration(us),min_duration(us)"
;
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
ActivityData
&
event
)
{
os
<<
"
\"
"
<<
event
.
basic_info_
->
kernel_name
<<
"
\"
,"
<<
event
.
basic_info_
->
kernel_type
<<
','
<<
event
.
basic_info_
->
op_name
<<
','
<<
event
.
basic_info_
->
stream_id
<<
','
<<
event
.
block_dim_
<<
','
<<
event
.
grid_dim_
<<
','
<<
event
.
count_
<<
','
<<
event
.
total_duration_
<<
','
<<
event
.
avg_duration_
<<
','
<<
event
.
max_duration_
<<
','
<<
event
.
min_duration_
;
return
os
;
}
ActivityData
&
operator
+=
(
const
ActivityData
&
other
);
};
using
OpInfoMap
=
std
::
unordered_map
<
std
::
string
,
OpInfo
>
;
using
DeviceActivityInfos
=
std
::
unordered_map
<
std
::
string
,
ActivityData
>
;
// <device_id, ActivityData>
using
AllActivityInfos
=
std
::
unordered_map
<
uint32_t
,
DeviceActivityInfos
>
;
// <device_id, ActivityData>
using
OpTypeInfos
=
std
::
unordered_map
<
std
::
string
,
OpType
>
;
// <op_full_name, Optype>
using
OpDetailInfos
=
std
::
vector
<
OpDetailInfo
>
;
class
DataSaver
{
public:
DataSaver
()
=
default
;
~
DataSaver
()
=
default
;
DataSaver
(
const
DataSaver
&
)
=
delete
;
DataSaver
&
operator
=
(
const
DataSaver
&
)
=
delete
;
void
ParseOpInfo
(
const
OpInfoMap
&
op_info_maps
);
void
ParseEvent
(
const
std
::
vector
<
Event
>
&
events
);
void
WriteFile
(
std
::
string
out_path
);
private:
void
AddOpDetailInfoForType
(
const
OpDetailInfo
&
op_detail_info
);
float
GetTotalOpTime
(
const
OpInfoMap
&
op_info_maps
);
void
AddKernelEvent
(
const
Event
&
event
);
void
AddKernelEventToDevice
(
const
Event
&
event
,
DeviceActivityInfos
*
device_activity_infos
);
void
WriteOpType
(
const
std
::
string
&
saver_base_dir
);
void
WriteOpDetail
(
const
std
::
string
&
saver_base_dir
);
void
WriteActivity
(
const
std
::
string
&
saver_base_dir
);
std
::
string
device_id_
;
AllActivityInfos
activity_infos_
;
OpTypeInfos
op_type_infos_
;
OpDetailInfos
op_detail_infos_
;
};
}
// namespace gpu
}
// namespace profiler
}
// namespace mindspore
#endif // MINDSPORE_DATA_SAVER_H
mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc
浏览文件 @
4149274b
...
...
@@ -19,6 +19,7 @@
#include <chrono>
#include "profiler/device/gpu/gpu_profiling.h"
#include "profiler/device/gpu/cupti_interface.h"
#include "profiler/device/gpu/data_saver.h"
#include "utils/log_adapter.h"
#include "pybind_api/api_register.h"
...
...
@@ -478,7 +479,11 @@ void GPUProfiler::Stop() {
void
GPUProfiler
::
SaveProfileData
()
{
if
(
profile_data_path_
.
empty
())
{
MS_LOG
(
WARNING
)
<<
"profile_data_path is empty, skip save profile data."
;
return
;
}
else
{
DataSaver
dataSaver
;
dataSaver
.
ParseOpInfo
(
op_info_map_
);
dataSaver
.
ParseEvent
(
events_
);
dataSaver
.
WriteFile
(
profile_data_path_
);
}
op_info_map_
.
clear
();
op_name_map_
.
clear
();
...
...
mindspore/profiler/parser/minddata_parser.py
浏览文件 @
4149274b
...
...
@@ -43,17 +43,21 @@ class MinddataParser:
node_name
,
node_start
,
node_end
,
queue_size
=
""
,
0
,
0
,
0
if
node_info
:
node_name
=
node_info
[
0
].
replace
(
"Node:"
,
""
)
if
len
(
node_info
)
>
2
:
if
len
(
node_info
)
>
3
and
"queue"
in
node_info
[
1
]:
queue_size
=
node_info
[
1
].
replace
(
"queue size:"
,
""
)
queue_size
=
int
(
queue_size
)
if
queue_size
.
isdigit
()
else
queue_size
node_start
=
node_info
[
2
].
replace
(
"Run start:"
,
""
)
node_start
=
int
(
node_start
)
if
node_start
.
isdigit
()
else
node_start
node_end
=
node_info
[
3
].
replace
(
"Run end:"
,
""
)
node_end
=
int
(
node_end
)
if
node_end
.
isdigit
()
else
node_end
elif
len
(
node_info
)
>
3
and
"Run"
in
node_info
[
1
]:
queue_size
=
node_info
[
3
].
replace
(
"queue size:"
,
""
)
queue_size
=
int
(
queue_size
)
if
queue_size
.
isdigit
()
else
queue_size
node_start
=
node_info
[
1
].
replace
(
"Run start:"
,
""
)
if
node_start
.
isdigit
():
node_start
=
int
(
node_start
)
node_start
=
int
(
node_start
)
if
node_start
.
isdigit
()
else
node_start
node_end
=
node_info
[
2
].
replace
(
"Run end:"
,
""
)
if
node_end
.
isdigit
():
node_end
=
int
(
node_end
)
if
len
(
node_info
)
>
3
:
queue_size
=
node_info
[
3
].
replace
(
"queue size:"
,
""
)
if
queue_size
.
isdigit
():
queue_size
=
int
(
queue_size
)
node_end
=
int
(
node_end
)
if
node_end
.
isdigit
()
else
node_end
one_step_list
=
[
node_name
,
node_start
,
node_end
,
queue_size
]
result
.
append
(
one_step_list
)
...
...
mindspore/profiler/profiling.py
浏览文件 @
4149274b
...
...
@@ -79,35 +79,42 @@ class Profiler:
optypes_to_deal
=
''
,
optypes_not_deal
=
'Variable'
,
job_id
=
""
):
# get device_id and device_target
self
.
_get_devid_and_devtarget
()
self
.
_container_path
=
os
.
path
.
join
(
self
.
_base_profiling_container_path
,
self
.
_dev_id
)
data_path
=
os
.
path
.
join
(
self
.
_container_path
,
"data"
)
if
not
os
.
path
.
exists
(
data_path
):
os
.
makedirs
(
data_path
,
exist_ok
=
True
)
self
.
_output_path
=
validate_and_normalize_path
(
output_path
)
self
.
_output_path
=
os
.
path
.
join
(
self
.
_output_path
,
"profiler"
)
if
not
os
.
path
.
exists
(
self
.
_output_path
):
os
.
makedirs
(
self
.
_output_path
,
exist_ok
=
True
)
os
.
environ
[
'PROFILING_MODE'
]
=
'true'
os
.
environ
[
'PROFILING_OPTIONS'
]
=
'training_trace:task_trace'
os
.
environ
[
'MINDDATA_PROFILING_DIR'
]
=
self
.
_output_path
os
.
environ
[
'DEVICE_ID'
]
=
self
.
_dev_id
os
.
environ
[
'AICPU_PROFILING_MODE'
]
=
'true'
os
.
environ
[
'PROFILING_DIR'
]
=
str
(
self
.
_container_path
)
# use context interface to open profiling, for the new mindspore version(after 2020.5.21)
context
.
set_context
(
enable_profiling
=
True
,
profiling_options
=
"training_trace:task_trace"
)
self
.
_subgraph
=
check_subgraph
(
subgraph
)
self
.
_valid_optype_name
=
optypes_to_deal
.
split
(
","
)
if
optypes_to_deal
else
[]
self
.
_filt_optype_names
=
optypes_not_deal
.
split
(
","
)
if
optypes_not_deal
else
[]
self
.
_detail
=
check_bool
(
is_detail
,
'is_detail'
)
self
.
_withfullpath
=
check_bool
(
is_show_op_path
,
'is_show_op_path'
)
self
.
_profiling_job_id
=
job_id
# add job id env through user input later
self
.
_job_id_env
=
0
self
.
_start_time
=
int
(
time
.
time
()
*
10000000
)
logger
.
info
(
"Profiling: profiling start time: %d"
,
self
.
_start_time
)
if
self
.
_device_target
and
self
.
_device_target
==
"GPU"
:
from
mindspore._c_expression
import
GPUProfiler
self
.
_gpu_profiler
=
GPUProfiler
.
get_instance
()
self
.
_gpu_profiler
.
init
(
self
.
_output_path
)
self
.
_gpu_profiler
.
step_profiling_enable
(
True
)
elif
self
.
_device_target
and
(
self
.
_device_target
==
"Ascend"
or
self
.
_device_target
!=
"Davinci"
):
self
.
_container_path
=
os
.
path
.
join
(
self
.
_base_profiling_container_path
,
self
.
_dev_id
)
data_path
=
os
.
path
.
join
(
self
.
_container_path
,
"data"
)
if
not
os
.
path
.
exists
(
data_path
):
os
.
makedirs
(
data_path
,
exist_ok
=
True
)
os
.
environ
[
'PROFILING_MODE'
]
=
'true'
os
.
environ
[
'PROFILING_OPTIONS'
]
=
'training_trace:task_trace'
os
.
environ
[
'MINDDATA_PROFILING_DIR'
]
=
self
.
_output_path
os
.
environ
[
'DEVICE_ID'
]
=
self
.
_dev_id
os
.
environ
[
'AICPU_PROFILING_MODE'
]
=
'true'
os
.
environ
[
'PROFILING_DIR'
]
=
str
(
self
.
_container_path
)
# use context interface to open profiling, for the new mindspore version(after 2020.5.21)
context
.
set_context
(
enable_profiling
=
True
,
profiling_options
=
"training_trace:task_trace"
)
self
.
_subgraph
=
check_subgraph
(
subgraph
)
self
.
_valid_optype_name
=
optypes_to_deal
.
split
(
","
)
if
optypes_to_deal
else
[]
self
.
_filt_optype_names
=
optypes_not_deal
.
split
(
","
)
if
optypes_not_deal
else
[]
self
.
_detail
=
check_bool
(
is_detail
,
'is_detail'
)
self
.
_withfullpath
=
check_bool
(
is_show_op_path
,
'is_show_op_path'
)
self
.
_profiling_job_id
=
job_id
# add job id env through user input later
self
.
_job_id_env
=
0
self
.
_start_time
=
int
(
time
.
time
()
*
10000000
)
logger
.
info
(
"Profiling: profiling start time: %d"
,
self
.
_start_time
)
def
analyse
(
self
):
"""
...
...
@@ -123,71 +130,74 @@ class Profiler:
>>> model.train()
>>> profiler.analyse()
"""
release
()
job_id
=
self
.
_get_profiling_job_id
()
logger
.
info
(
"Profiling: job id is %s "
,
job_id
)
source_path
=
os
.
path
.
join
(
PROFILING_LOG_BASE_PATH
,
job_id
)
# parse hwts.log.data.45.dev file, and get task profiling data
hwts_output_filename
=
self
.
_hwts_output_filename_target
+
self
.
_dev_id
+
".txt"
hwts_output_filename
=
os
.
path
.
join
(
self
.
_output_path
,
hwts_output_filename
)
hwtslog_parser
=
HWTSLogParser
(
source_path
,
hwts_output_filename
)
result
=
hwtslog_parser
.
execute
()
if
not
result
:
logger
.
error
(
"Profiling: fail to parse hwts log file."
)
return
# parse Framework file, and get the relation of op and tasks
framework_parser
=
FrameworkParser
(
job_id
,
self
.
_dev_id
,
self
.
_output_path
)
framework_parser
.
parse
()
op_task_dict
=
framework_parser
.
to_task_id_full_op_name_dict
()
if
not
op_task_dict
:
logger
.
error
(
"Profiling: fail to parse framework files."
)
return
# get op compute time from hwts data and framework data, write output_op_compute_time.txt
opcompute_output_filename
=
self
.
_opcompute_output_filename_target
+
self
.
_dev_id
+
".txt"
opcompute_output_filename
=
os
.
path
.
join
(
self
.
_output_path
,
opcompute_output_filename
)
optime_parser
=
OPComputeTimeParser
(
hwts_output_filename
,
opcompute_output_filename
,
op_task_dict
,
self
.
_output_path
,
self
.
_dev_id
)
optime_parser
.
execute
()
# parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
output_data_preprocess_aicpu
=
self
.
_aicpu_op_output_filename_target
+
self
.
_dev_id
+
".txt"
output_data_preprocess_aicpu
=
os
.
path
.
join
(
self
.
_output_path
,
output_data_preprocess_aicpu
)
aicpu_data_parser
=
DataPreProcessParser
(
source_path
,
output_data_preprocess_aicpu
)
aicpu_data_parser
.
execute
()
# Parsing minddata AICPU profiling
MinddataParser
.
execute
(
source_path
,
self
.
_output_path
,
self
.
_dev_id
)
# parse minddata pipeline operator and queue
try
:
pipeline_parser
=
MinddataPipelineParser
(
self
.
_output_path
,
self
.
_dev_id
,
self
.
_output_path
)
pipeline_parser
.
parse
()
except
ProfilerException
as
err
:
logger
.
warning
(
err
.
message
)
# analyse op compute time info
try
:
self
.
_analyser_op_info
()
except
ProfilerException
as
err
:
logger
.
warning
(
err
.
message
)
# analyse step trace info
try
:
self
.
_analyse_step_trace
(
source_path
,
framework_parser
)
except
ProfilerException
as
err
:
logger
.
warning
(
err
.
message
)
# analyse timeline info
try
:
self
.
_analyse_timeline
(
aicpu_data_parser
,
optime_parser
)
except
(
ProfilerIOException
,
ProfilerFileNotFoundException
,
RuntimeError
)
as
err
:
logger
.
warning
(
'Fail to write timeline data: %s'
,
err
)
if
self
.
_device_target
and
self
.
_device_target
==
"GPU"
:
self
.
_gpu_profiler
.
stop
()
elif
self
.
_device_target
and
(
self
.
_device_target
==
"Ascend"
or
self
.
_device_target
!=
"Davinci"
):
release
()
job_id
=
self
.
_get_profiling_job_id
()
logger
.
info
(
"Profiling: job id is %s "
,
job_id
)
source_path
=
os
.
path
.
join
(
PROFILING_LOG_BASE_PATH
,
job_id
)
# parse hwts.log.data.45.dev file, and get task profiling data
hwts_output_filename
=
self
.
_hwts_output_filename_target
+
self
.
_dev_id
+
".txt"
hwts_output_filename
=
os
.
path
.
join
(
self
.
_output_path
,
hwts_output_filename
)
hwtslog_parser
=
HWTSLogParser
(
source_path
,
hwts_output_filename
)
result
=
hwtslog_parser
.
execute
()
if
not
result
:
logger
.
error
(
"Profiling: fail to parse hwts log file."
)
return
# parse Framework file, and get the relation of op and tasks
framework_parser
=
FrameworkParser
(
job_id
,
self
.
_dev_id
,
self
.
_output_path
)
framework_parser
.
parse
()
op_task_dict
=
framework_parser
.
to_task_id_full_op_name_dict
()
if
not
op_task_dict
:
logger
.
error
(
"Profiling: fail to parse framework files."
)
return
# get op compute time from hwts data and framework data, write output_op_compute_time.txt
opcompute_output_filename
=
self
.
_opcompute_output_filename_target
+
self
.
_dev_id
+
".txt"
opcompute_output_filename
=
os
.
path
.
join
(
self
.
_output_path
,
opcompute_output_filename
)
optime_parser
=
OPComputeTimeParser
(
hwts_output_filename
,
opcompute_output_filename
,
op_task_dict
,
self
.
_output_path
,
self
.
_dev_id
)
optime_parser
.
execute
()
# parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
output_data_preprocess_aicpu
=
self
.
_aicpu_op_output_filename_target
+
self
.
_dev_id
+
".txt"
output_data_preprocess_aicpu
=
os
.
path
.
join
(
self
.
_output_path
,
output_data_preprocess_aicpu
)
aicpu_data_parser
=
DataPreProcessParser
(
source_path
,
output_data_preprocess_aicpu
)
aicpu_data_parser
.
execute
()
# Parsing minddata AICPU profiling
MinddataParser
.
execute
(
source_path
,
self
.
_output_path
,
self
.
_dev_id
)
# parse minddata pipeline operator and queue
try
:
pipeline_parser
=
MinddataPipelineParser
(
self
.
_output_path
,
self
.
_dev_id
,
self
.
_output_path
)
pipeline_parser
.
parse
()
except
ProfilerException
as
err
:
logger
.
warning
(
err
.
message
)
# analyse op compute time info
try
:
self
.
_analyser_op_info
()
except
ProfilerException
as
err
:
logger
.
warning
(
err
.
message
)
# analyse step trace info
try
:
self
.
_analyse_step_trace
(
source_path
,
framework_parser
)
except
ProfilerException
as
err
:
logger
.
warning
(
err
.
message
)
# analyse timeline info
try
:
self
.
_analyse_timeline
(
aicpu_data_parser
,
optime_parser
)
except
(
ProfilerIOException
,
ProfilerFileNotFoundException
,
RuntimeError
)
as
err
:
logger
.
warning
(
'Fail to write timeline data: %s'
,
err
)
def
_analyse_step_trace
(
self
,
source_path
,
framework_parser
):
"""
...
...
@@ -416,12 +426,12 @@ class Profiler:
dev_id
=
"0"
logger
.
error
(
"Fail to get DEVICE_ID, use 0 instead."
)
if
device_target
and
device_target
!=
"Davinci"
\
and
device_target
!=
"Ascend"
:
if
device_target
and
device_target
not
in
[
"Davinci"
,
"Ascend"
,
"GPU"
]:
msg
=
"Profiling: unsupport backend: %s"
%
device_target
raise
RuntimeError
(
msg
)
self
.
_dev_id
=
dev_id
self
.
_device_target
=
device_target
@
staticmethod
def
trainable_parameters
(
network
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录