Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
mindinsight
提交
b95122b1
M
mindinsight
项目概览
MindSpore
/
mindinsight
通知
8
Star
3
Fork
2
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindinsight
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b95122b1
编写于
8月 22, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 22, 2020
浏览文件
操作
浏览文件
下载
差异文件
!561 Remove sysmetric AI Core querying
Merge pull request !561 from LiHongzhang/rm_ai_core
上级
ad454e27
cb869c8b
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
14 addition
and
61 deletion
+14
-61
mindinsight/sysmetric/__init__.py
mindinsight/sysmetric/__init__.py
+14
-0
mindinsight/sysmetric/collector/_collect_npu.py
mindinsight/sysmetric/collector/_collect_npu.py
+0
-61
未找到文件。
mindinsight/sysmetric/__init__.py
0 → 100644
浏览文件 @
b95122b1
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
mindinsight/sysmetric/collector/_collect_npu.py
浏览文件 @
b95122b1
...
...
@@ -15,51 +15,13 @@
"""The npu collector."""
import
inspect
from
collections
import
defaultdict
from
ctypes
import
CDLL
,
Structure
,
byref
,
c_char
,
c_int
,
c_uint
,
c_ulong
,
c_ushort
from
functools
import
lru_cache
,
wraps
from
threading
import
Lock
,
Thread
from
mindinsight.sysmetric.common.exceptions
import
DsmiQueryingException
from
mindinsight.sysmetric.common.log
import
logger
def
_timeout
(
seconds
,
default
):
"""
The timeout decorator wait for specified seconds or return the default value.
Args:
seconds (float): The specified seconds.
default (Any): The default value.
"""
def
outer
(
fn
):
cached
,
lockdict
=
{},
defaultdict
(
Lock
)
def
target
(
*
args
):
lock
=
lockdict
[
args
]
if
lock
.
acquire
(
blocking
=
False
):
try
:
cached
[
args
]
=
fn
(
*
args
)
finally
:
lock
.
release
()
else
:
logger
.
debug
(
'%s%r skipped.'
,
fn
.
__name__
,
args
)
@
wraps
(
fn
)
def
inner
(
*
args
):
thread
=
Thread
(
target
=
target
,
args
=
args
,
daemon
=
True
)
thread
.
start
()
thread
.
join
(
seconds
)
if
thread
.
is_alive
():
logger
.
debug
(
'%s%r timeouted.'
,
fn
.
__name__
,
args
)
return
cached
.
get
(
args
,
default
)
return
inner
return
outer
def
_fallback_to_prev_result
(
fn
):
"""Fallback to previous successful result when failing."""
prev_result
=
None
...
...
@@ -270,27 +232,6 @@ def dsmi_get_hbm_info(device_id):
}
@
_timeout
(
0.2
,
-
1
)
def
dsmi_get_device_utilization_rate
(
device_id
,
device_type
):
"""
Get device utilization rate, %.
Note: Query AI Core when profiling turns on will return failure.
Args:
device_id (int): The specific device id
device_type (int): The device type, 1 for memory, 2 AI Core, 5 memory bandwidth, 6 HBM, 10 HBM bandwidth.
Returns:
int, the utilization rate, returning -1 to indicate querying failed.
"""
device_id
=
c_int
(
device_id
)
device_type
=
c_int
(
device_type
)
utilization_rate
=
c_uint
()
if
_libsmicall
(
device_id
,
device_type
,
byref
(
utilization_rate
)):
return
utilization_rate
.
value
return
-
1
@
_fallback_to_prev_result
def
dsmi_get_device_power_info
(
device_id
):
"""
...
...
@@ -395,14 +336,12 @@ def _collect_one(device_id):
success
[
3
],
ip_addr
=
dsmi_get_device_ip_address
(
device_id
)
success
[
4
],
power_info
=
dsmi_get_device_power_info
(
device_id
)
success
[
5
],
temperature
=
dsmi_get_device_temperature
(
device_id
)
aicore_rate
=
dsmi_get_device_utilization_rate
(
device_id
,
2
)
return
{
'chip_name'
:
chip_info
.
get
(
'chip_name'
),
'device_id'
:
device_id
,
'available'
:
all
(
success
)
and
health
==
0
and
hbm_info
.
get
(
'memory_usage'
,
0
)
//
kb_to_mb
<
memory_threshold
,
'health'
:
health
,
'ip_address'
:
ip_addr
.
get
(
'ip_address'
),
'aicore_rate'
:
aicore_rate
,
'hbm_info'
:
{
'memory_size'
:
hbm_info
.
get
(
'memory_size'
)
//
kb_to_mb
,
'memory_usage'
:
hbm_info
.
get
(
'memory_usage'
)
//
kb_to_mb
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录