diff --git a/mindinsight/backend/datavisual/__init__.py b/mindinsight/backend/datavisual/__init__.py index 63039b4d296b6e4ba3b162848340350cd8cdbbd7..278f18059725a994178ee02304c270a98ed1b1ca 100644 --- a/mindinsight/backend/datavisual/__init__.py +++ b/mindinsight/backend/datavisual/__init__.py @@ -17,7 +17,6 @@ from mindinsight.backend.datavisual.static_resource_api import init_module as static_init_module from mindinsight.backend.datavisual.task_manager_api import init_module as task_init_module from mindinsight.backend.datavisual.train_visual_api import init_module as train_init_module -from mindinsight.backend.datavisual.sysmetric_api import init_module as sysmetric_init_module def init_module(app): @@ -31,4 +30,3 @@ def init_module(app): static_init_module(app) task_init_module(app) train_init_module(app) - sysmetric_init_module(app) diff --git a/mindinsight/backend/datavisual/sysmetric_api.py b/mindinsight/backend/datavisual/sysmetric_api.py deleted file mode 100644 index be66f3760b69820bffbdd178215ca7182a6ec5ce..0000000000000000000000000000000000000000 --- a/mindinsight/backend/datavisual/sysmetric_api.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""System metrics API.""" - -from flask import Blueprint, jsonify -from mindinsight.conf import settings -from mindinsight.sysmetric.collector import get_metrics - -BLUEPRINT = Blueprint("sysmetric", __name__, url_prefix=settings.URL_PATH_PREFIX + settings.API_PREFIX) - - -@BLUEPRINT.route("/sysmetric/current", methods=["GET"]) -def query_sysmetric(): - """Query the system metrics.""" - - return jsonify(get_metrics()) - - -def init_module(app): - """ - Init module entry. - - Args: - app: the application obj. - - """ - app.register_blueprint(BLUEPRINT) diff --git a/mindinsight/sysmetric/__init__.py b/mindinsight/sysmetric/__init__.py deleted file mode 100644 index e30774307ca2107b3a81c071ad33c042ef924790..0000000000000000000000000000000000000000 --- a/mindinsight/sysmetric/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ diff --git a/mindinsight/sysmetric/collector/__init__.py b/mindinsight/sysmetric/collector/__init__.py deleted file mode 100644 index 80bd80ca0590c45dd041a0dcc18e743313188628..0000000000000000000000000000000000000000 --- a/mindinsight/sysmetric/collector/__init__.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""The metrics collector.""" -from ._collect_cpu import collect_cpu -from ._collect_mem import collect_mem -from ._collect_npu import collect_npu - -__all__ = ['collect_cpu', 'collect_mem', 'collect_npu', 'get_metrics'] - - -def get_metrics(): - mem = collect_mem() - mem_total = mem.get('total') - mem_available = mem.get('available') - mem_used = mem.get('used') - return { - 'npu': collect_npu(), - 'cpu': { - 'overall': collect_cpu(percent=True), - 'percpu': collect_cpu(percpu=True, percent=True) - }, - 'memory': { - 'virtual': { - 'available': mem_available, - 'used': mem_used, - 'others': max(mem_total - mem_available - mem_used, 0) - } - } - } diff --git a/mindinsight/sysmetric/collector/_collect_cpu.py b/mindinsight/sysmetric/collector/_collect_cpu.py deleted file mode 100644 index 4ce5365b79618a12218eac099759385dd7be1823..0000000000000000000000000000000000000000 --- a/mindinsight/sysmetric/collector/_collect_cpu.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""The cpu collector.""" - -import psutil - - -def collect_cpu(percpu=False, percent=False): - """ - Collect the cpu info. - - Args: - percpu (bool): To return a list of cpu info for each logical CPU on the system. - percent (bool): Represent the sized in percentage. - - Returns: - Union[dict, List[dict]], the CPUs info. - """ - if percent: - times = psutil.cpu_times_percent(percpu=percpu) - else: - times = psutil.cpu_times(percpu=percpu) - if not percpu: - return dict(times._asdict()) - return [dict(time._asdict()) for time in times] diff --git a/mindinsight/sysmetric/collector/_collect_mem.py b/mindinsight/sysmetric/collector/_collect_mem.py deleted file mode 100644 index 6810787f05905832291ef5cc959a64f5bbbf012c..0000000000000000000000000000000000000000 --- a/mindinsight/sysmetric/collector/_collect_mem.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""The memory collector.""" - -import psutil - - -def collect_mem(): - """ - Collect the virtual memory info. - - Returns: - dict, the virtual memory info. - """ - return dict(psutil.virtual_memory()._asdict()) diff --git a/mindinsight/sysmetric/collector/_collect_npu.py b/mindinsight/sysmetric/collector/_collect_npu.py deleted file mode 100644 index 7bdbbbec3804a4fbcdb1fa7e7e760a79dd8a5a30..0000000000000000000000000000000000000000 --- a/mindinsight/sysmetric/collector/_collect_npu.py +++ /dev/null @@ -1,359 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""The npu collector.""" - -import inspect -from ctypes import CDLL, Structure, byref, c_char, c_int, c_uint, c_ulong, c_ushort -from functools import lru_cache, wraps - -from mindinsight.sysmetric.common.exceptions import DsmiQueryingException -from mindinsight.sysmetric.common.log import logger - - -def _fallback_to_prev_result(fn): - """Fallback to previous successful result when failing.""" - prev_result = None - - @wraps(fn) - def wrap(*args): - nonlocal prev_result - sucess, result = fn(*args) - if sucess: - prev_result = result - return sucess, result - if prev_result is not None: - return sucess, prev_result - raise RuntimeError(f'{fn.__name__} querying failed and no previous successful result.') - - return wrap - - -def _libsmicall(*args): - """ - Call the lib function to querying NPU metrics. - - Returns: - bool, True when success of querying, False otherwise. - """ - if not libsmi: - logger.error('Trying to call the libdrvdsmi_host which is not loaded.') - raise ValueError('Trying to call the libdrvdsmi_host which is not loaded.') - fname = inspect.stack()[1].function - error_code = getattr(libsmi, fname)(*args) - if error_code != 0: - logger.error('%s querying failed with error code %d.', fname, error_code) - return error_code == 0 - - -@lru_cache(maxsize=4) -def dsmi_get_device_count(): - """ - Get device count. - - Returns: - int, the device count. - - Raises: - RuntimeError, when querying dsmi returning non-zero. - """ - device_count = c_int() - - if _libsmicall(byref(device_count)): - return device_count.value - raise RuntimeError('Querying device count failed.') - - -@lru_cache(maxsize=4) -def dsmi_list_device(count): - """ - List the device IDs. - - Args: - count (int): The device count. - - Returns: - List[int], the device IDs. - - Raises: - RuntimeError, when querying dsmi returning non-zero. - """ - device_id_array = c_int * count - device_id_list = device_id_array() - count = c_int(count) - - if _libsmicall(device_id_list, count): - return list(device_id_list) - raise RuntimeError('Querying device id list failed.') - - -@lru_cache(maxsize=8) -@_fallback_to_prev_result -def dsmi_get_chip_info(device_id): - """ - Get chip info. - - Args: - device_id (int): The specific device id. - - Returns: - dict, the chip info: - - chip_type (str): The chip type. - - chip_name (str): The chip name. - - chip_ver (str): The chip name. - - Raises: - RuntimeError, when querying dsmi returning non-zero. - """ - - class ChipInfoStruct(Structure): - _fields_ = [('chip_type', c_char * 32), ('chip_name', c_char * 32), ('chip_ver', c_char * 32)] - - device_id = c_int(device_id) - chip_info = ChipInfoStruct() - success = _libsmicall(device_id, byref(chip_info)) - return success, { - 'chip_type': chip_info.chip_type.decode('utf-8'), - 'chip_name': chip_info.chip_name.decode('utf-8'), - 'chip_ver': chip_info.chip_ver.decode('utf-8') - } - - -@_fallback_to_prev_result -def dsmi_get_device_health(device_id): - """ - Get device health. - - Args: - device_id (int): The specific device id. - - Returns: - int, 0 indicats normal, 1 minor alarm, 2 major alarm, 3 critical alarm, 0xffffffff device not found. - - Raises: - RuntimeError, when querying dsmi returning non-zero. - """ - device_id = c_int(device_id) - health = c_uint() - - success = _libsmicall(device_id, byref(health)) - - return success, health.value - - -@lru_cache(maxsize=8) -@_fallback_to_prev_result -def dsmi_get_device_ip_address(device_id): - """ - Get device IP address. - - Args: - device_id (int): The specific device ID. - Returns: - dict, the device IP address: - - ip_address (str): the IP address. - - mask_address (str): the mask address. - - Raises: - RuntimeError, when querying dsmi returning non-zero. - """ - is_ipv6, port_type, port_id = False, 1, 0 - - class Ipaddrstruct(Structure): - _fields_ = [('u_addr', c_char * (16 if is_ipv6 else 4)), ('ip_type', c_int)] - - ip_type = c_int(1 if is_ipv6 else 0) - - device_id = c_int(device_id) - ip_address = Ipaddrstruct(b'', ip_type) - mask_address = Ipaddrstruct(b'', ip_type) - - success = _libsmicall(device_id, port_type, port_id, byref(ip_address), byref(mask_address)) - - def pad(u_addr): - for i in range(4): - if i < len(u_addr): - yield u_addr[i] - else: - yield 0 - - return success, { - 'ip_address': '.'.join(str(c) for c in pad(ip_address.u_addr)), - 'mask_address': '.'.join(str(c) for c in pad(mask_address.u_addr)) - } - - -@_fallback_to_prev_result -def dsmi_get_hbm_info(device_id): - """ - Get the HBM info. - - Args: - device_id (int): The specific device id. - - Returns: - dict, the HBM info: - memory_size (int), The total HBM memory, in KB. - frep (int), The HBM frequency, in MHZ. - memory_usage (int), The used HBM memory, in KB. - temp (int), The HBM temperature, in °C. - bandwith_util_rate (int): The bandwith util rate, in %. - - Raises: - RuntimeError, when querying dsmi returning non-zero. - """ - - class HbmInfoStruct(Structure): - _fields_ = [('memory_size', c_ulong), ('freq', c_uint), ('memory_usage', c_ulong), ('temp', c_int), - ('bandwith_util_rate', c_uint)] - - device_id = c_int(device_id) - hbm_info = HbmInfoStruct() - - success = _libsmicall(device_id, byref(hbm_info)) - - return success, { - 'memory_size': hbm_info.memory_size, - 'freq': hbm_info.freq, - 'memory_usage': hbm_info.memory_usage, - 'temp': hbm_info.temp, - 'bandwith_util_rate': hbm_info.bandwith_util_rate - } - - -@_fallback_to_prev_result -def dsmi_get_device_power_info(device_id): - """ - Get the device power. - - Args: - device_id (int): The specific device id. - - Returns: - dict, the device power info. - - power, the device power, in Watt. - - Raises: - RuntimeError, when querying dsmi returning non-zero. - """ - - class PowerInfoStruct(Structure): - _fields_ = [('power', c_ushort)] - - power_info = PowerInfoStruct() - device_id = c_int(device_id) - - success = _libsmicall(device_id, byref(power_info)) - return success, {'power': round(power_info.power * 0.1, 2)} - - -@_fallback_to_prev_result -def dsmi_get_device_temperature(device_id): - """ - Get the device temperature. - - Args: - device_id (int): The specific device id. - - Returns: - int, the device temperature, in °C. - - Raises: - RuntimeError, when querying dsmi returning non-zero. - """ - device_id = c_int(device_id) - temperature = c_uint() - - success = _libsmicall(device_id, byref(temperature)) - - return success, temperature.value - - -def collect_npu(): - """Collect the metrics for each NPUs. - - Returns: - List[dict], the metrics of each NPUs. - - Raises: - DsmiQueryingException, when querying dsmi returning non-zero. - """ - try: - return _collect_npus() - except RuntimeError as e: - logger.warning(e.args[0]) - raise DsmiQueryingException(e.args[0]) - - -def _collect_npus(): - """Collect the metrics for each NPUs. - - Returns: - List[dict], the metrics of each NPUs. - - Raises: - RuntimeError, when querying dsmi returning non-zero. - """ - if not libsmi: - return None - count = dsmi_get_device_count() - device_ids = dsmi_list_device(count) - npus = [] - for device_id in device_ids: - npu = _collect_one(device_id) - npus.append(npu) - return npus - - -def _collect_one(device_id): - """ - Collect NPU info by the device_id. - - Args: - device_id (int): The specific device id. - - Returns: - dict, the NPU info. - - Raises: - RuntimeError, when querying dsmi returning non-zero. - """ - kb_to_mb, memory_threshold, success = 1024, 4, [True] * 6 - success[0], health = dsmi_get_device_health(device_id) - success[1], hbm_info = dsmi_get_hbm_info(device_id) - success[2], chip_info = dsmi_get_chip_info(device_id) - success[3], ip_addr = dsmi_get_device_ip_address(device_id) - success[4], power_info = dsmi_get_device_power_info(device_id) - success[5], temperature = dsmi_get_device_temperature(device_id) - return { - 'chip_name': chip_info.get('chip_name'), - 'device_id': device_id, - 'available': all(success) and health == 0 and hbm_info.get('memory_usage', 0) // kb_to_mb < memory_threshold, - 'health': health, - 'ip_address': ip_addr.get('ip_address'), - 'hbm_info': { - 'memory_size': hbm_info.get('memory_size') // kb_to_mb, - 'memory_usage': hbm_info.get('memory_usage') // kb_to_mb - }, - 'power': power_info.get('power'), - 'temperature': temperature, - 'success': all(success) - } - - -try: - libsmi = CDLL('libdrvdsmi_host.so') -except OSError: - logger.info('Failed to load libdrvdsmi_host.so.') - libsmi = None diff --git a/mindinsight/sysmetric/common/__init__.py b/mindinsight/sysmetric/common/__init__.py deleted file mode 100644 index e30774307ca2107b3a81c071ad33c042ef924790..0000000000000000000000000000000000000000 --- a/mindinsight/sysmetric/common/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ diff --git a/mindinsight/sysmetric/common/exceptions.py b/mindinsight/sysmetric/common/exceptions.py deleted file mode 100644 index 9dbcfff9f6023943a6ff029472b2b0dbf1012f7b..0000000000000000000000000000000000000000 --- a/mindinsight/sysmetric/common/exceptions.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""Define custom exception.""" - -from mindinsight.utils.exceptions import MindInsightException -from mindinsight.utils.constant import SysmetricErrors - - -class DsmiQueryingException(MindInsightException): - """Dsmi Querying Failure""" - - def __init__(self, message): - super(DsmiQueryingException, self).__init__(SysmetricErrors.DSMI_QUERYING_NONZERO, message) diff --git a/mindinsight/sysmetric/common/log.py b/mindinsight/sysmetric/common/log.py deleted file mode 100644 index e4e35e28f0ea0186ddcaea62f907f15afef131e2..0000000000000000000000000000000000000000 --- a/mindinsight/sysmetric/common/log.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""Resource logger.""" -from mindinsight.utils.log import setup_logger - -logger = setup_logger(sub_module='sysmetric', log_name='sysmetric') diff --git a/mindinsight/utils/constant.py b/mindinsight/utils/constant.py index d96451325bf19f8db6a082bed6666c38e9280e2d..f9126bf7f5702b05b965fe30a4c2fdf1e6d7314d 100644 --- a/mindinsight/utils/constant.py +++ b/mindinsight/utils/constant.py @@ -31,7 +31,6 @@ class MindInsightModules(Enum): DATAVISUAL = 5 PROFILERMGR = 6 SCRIPTCONVERTER = 7 - SYSMETRIC = 8 WIZARD = 9 @@ -82,10 +81,5 @@ class DataVisualErrors(Enum): class ScriptConverterErrors(Enum): """Enum definition for mindconverter errors.""" -class SysmetricErrors(Enum): - """Enum definition for sysmetric errors.""" - DSMI_QUERYING_NONZERO = 1 - - class WizardErrors(Enum): """Enum definition for mindwizard errors.""" diff --git a/tests/ut/sysmetric/__init__.py b/tests/ut/sysmetric/__init__.py deleted file mode 100644 index 8783813b1bb37d36d1c2320f20e8919d6a1adf02..0000000000000000000000000000000000000000 --- a/tests/ut/sysmetric/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""Test the system metrics.""" diff --git a/tests/ut/sysmetric/test_metrics_collector.py b/tests/ut/sysmetric/test_metrics_collector.py deleted file mode 100644 index 3f28d92646b0968840a029125bb0cca7b06ae1e6..0000000000000000000000000000000000000000 --- a/tests/ut/sysmetric/test_metrics_collector.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""Test the metrics collector.""" -from os import cpu_count - -from mindinsight.sysmetric.collector import collect_cpu, collect_mem, collect_npu - - -def test_collect_cpu(): - overall = collect_cpu(percent=True) - assert isinstance(overall, dict) - for value in overall.values(): - assert 0 <= value <= 100 - for key in 'user', 'system', 'idle': - assert key in overall - cores = collect_cpu(percpu=True) - assert isinstance(cores, list) and len(cores) == cpu_count() - - -def test_collect_mem(): - mem = collect_mem() - assert 'total' in mem - assert 'available' in mem - assert mem['total'] > mem['available'] - - -def test_collect_npu(): - npu = collect_npu() - if npu is not None: - assert len(npu) == 8