提交 582463f7 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!480 fix dsmi_get_device_utilization_rate

Merge pull request !480 from LiHongzhang/fix_unpack
...@@ -270,8 +270,7 @@ def dsmi_get_hbm_info(device_id): ...@@ -270,8 +270,7 @@ def dsmi_get_hbm_info(device_id):
} }
@_timeout(0.2, 0) @_timeout(0.2, -1)
@_fallback_to_prev_result
def dsmi_get_device_utilization_rate(device_id, device_type): def dsmi_get_device_utilization_rate(device_id, device_type):
""" """
Get device utilization rate, %. Get device utilization rate, %.
...@@ -282,13 +281,14 @@ def dsmi_get_device_utilization_rate(device_id, device_type): ...@@ -282,13 +281,14 @@ def dsmi_get_device_utilization_rate(device_id, device_type):
device_id (int): The specific device id device_id (int): The specific device id
device_type (int): The device type, 1 for memory, 2 AI Core, 5 memory bandwidth, 6 HBM, 10 HBM bandwidth. device_type (int): The device type, 1 for memory, 2 AI Core, 5 memory bandwidth, 6 HBM, 10 HBM bandwidth.
Returns: Returns:
int, the utilization rate. int, the utilization rate, returning -1 to indicate querying failed.
""" """
device_id = c_int(device_id) device_id = c_int(device_id)
device_type = c_int(device_type) device_type = c_int(device_type)
utilization_rate = c_uint() utilization_rate = c_uint()
success = _libsmicall(device_id, device_type, byref(utilization_rate)) if _libsmicall(device_id, device_type, byref(utilization_rate)):
return success, utilization_rate.value return utilization_rate.value
return -1
@_fallback_to_prev_result @_fallback_to_prev_result
...@@ -388,14 +388,14 @@ def _collect_one(device_id): ...@@ -388,14 +388,14 @@ def _collect_one(device_id):
Raises: Raises:
RuntimeError, when querying dsmi returning non-zero. RuntimeError, when querying dsmi returning non-zero.
""" """
kb_to_mb, memory_threshold, success = 1024, 4, [True] * 7 kb_to_mb, memory_threshold, success = 1024, 4, [True] * 6
success[0], health = dsmi_get_device_health(device_id) success[0], health = dsmi_get_device_health(device_id)
success[1], hbm_info = dsmi_get_hbm_info(device_id) success[1], hbm_info = dsmi_get_hbm_info(device_id)
success[2], chip_info = dsmi_get_chip_info(device_id) success[2], chip_info = dsmi_get_chip_info(device_id)
success[3], ip_addr = dsmi_get_device_ip_address(device_id) success[3], ip_addr = dsmi_get_device_ip_address(device_id)
success[4], aicore_rate = dsmi_get_device_utilization_rate(device_id, 2) success[4], power_info = dsmi_get_device_power_info(device_id)
success[5], power_info = dsmi_get_device_power_info(device_id) success[5], temperature = dsmi_get_device_temperature(device_id)
success[6], temperature = dsmi_get_device_temperature(device_id) aicore_rate = dsmi_get_device_utilization_rate(device_id, 2)
return { return {
'chip_name': chip_info.get('chip_name'), 'chip_name': chip_info.get('chip_name'),
'device_id': device_id, 'device_id': device_id,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册