From 70116923ba05bb9ba1d5cfd9ef5072ceac465c47 Mon Sep 17 00:00:00 2001 From: Li Hongzhang Date: Mon, 27 Jul 2020 15:03:06 +0800 Subject: [PATCH] fix dsmi_get_device_utilization_rate --- .../sysmetric/collector/_collect_npu.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mindinsight/sysmetric/collector/_collect_npu.py b/mindinsight/sysmetric/collector/_collect_npu.py index 8329ada..c5ede85 100644 --- a/mindinsight/sysmetric/collector/_collect_npu.py +++ b/mindinsight/sysmetric/collector/_collect_npu.py @@ -270,8 +270,7 @@ def dsmi_get_hbm_info(device_id): } -@_timeout(0.2, 0) -@_fallback_to_prev_result +@_timeout(0.2, -1) def dsmi_get_device_utilization_rate(device_id, device_type): """ Get device utilization rate, %. @@ -282,13 +281,14 @@ def dsmi_get_device_utilization_rate(device_id, device_type): device_id (int): The specific device id device_type (int): The device type, 1 for memory, 2 AI Core, 5 memory bandwidth, 6 HBM, 10 HBM bandwidth. Returns: - int, the utilization rate. + int, the utilization rate, returning -1 to indicate querying failed. """ device_id = c_int(device_id) device_type = c_int(device_type) utilization_rate = c_uint() - success = _libsmicall(device_id, device_type, byref(utilization_rate)) - return success, utilization_rate.value + if _libsmicall(device_id, device_type, byref(utilization_rate)): + return utilization_rate.value + return -1 @_fallback_to_prev_result @@ -388,14 +388,14 @@ def _collect_one(device_id): Raises: RuntimeError, when querying dsmi returning non-zero. """ - kb_to_mb, memory_threshold, success = 1024, 4, [True] * 7 + kb_to_mb, memory_threshold, success = 1024, 4, [True] * 6 success[0], health = dsmi_get_device_health(device_id) success[1], hbm_info = dsmi_get_hbm_info(device_id) success[2], chip_info = dsmi_get_chip_info(device_id) success[3], ip_addr = dsmi_get_device_ip_address(device_id) - success[4], aicore_rate = dsmi_get_device_utilization_rate(device_id, 2) - success[5], power_info = dsmi_get_device_power_info(device_id) - success[6], temperature = dsmi_get_device_temperature(device_id) + success[4], power_info = dsmi_get_device_power_info(device_id) + success[5], temperature = dsmi_get_device_temperature(device_id) + aicore_rate = dsmi_get_device_utilization_rate(device_id, 2) return { 'chip_name': chip_info.get('chip_name'), 'device_id': device_id, -- GitLab