diff --git a/mindinsight/sysmetric/collector/_collect_npu.py b/mindinsight/sysmetric/collector/_collect_npu.py index 9f34c8a4161eee7badde7db78a85a9e885518dc4..62071f318261d6c5955d070d658562f3ee21ea9d 100644 --- a/mindinsight/sysmetric/collector/_collect_npu.py +++ b/mindinsight/sysmetric/collector/_collect_npu.py @@ -16,15 +16,37 @@ import inspect from ctypes import CDLL, Structure, byref, c_char, c_int, c_uint, c_ulong, c_ushort -from functools import lru_cache +from functools import lru_cache, wraps +from threading import Thread from mindinsight.sysmetric.common.log import logger -try: - libsmi = CDLL('libdrvdsmi_host.so') -except OSError: - logger.info('Failed to load libdrvdsmi_host.so.') - libsmi = None + +def _timeout(seconds, default): + """ + The timeout decorator wait for specified seconds or return the default value. + + Args: + seconds (float): The specified seconds. + default (Any): The default value. + """ + + def outer(fn): + + def target(*args, **kwargs): + nonlocal default + default = fn(*args, **kwargs) + + @wraps(fn) + def inner(*args, **kwargs): + thread = Thread(target=target, args=args, kwargs=kwargs) + thread.start() + thread.join(seconds) + return default + + return inner + + return outer def libsmicall(*args, **kwargs): @@ -188,6 +210,7 @@ def dsmi_get_hbm_info(device_id): } +@_timeout(0.2, 0) def dsmi_get_device_utilization_rate(device_id, device_type): """ Get device utilization rate, %. @@ -279,3 +302,11 @@ def collect_npu(): 'temperature': dsmi_get_device_temperature(device_id) }) return npus + + +try: + libsmi = CDLL('libdrvdsmi_host.so') + Thread(target=collect_npu).start() +except OSError: + logger.info('Failed to load libdrvdsmi_host.so.') + libsmi = None