提交 7e17d6ff 编写于 作者: W wenkai

refactor data manager and unify cache and data access/reload

上级 46d44977
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Trigger data manager load."""
from mindinsight.datavisual.data_transform.data_manager import DATA_MANAGER
from mindinsight.datavisual.common.log import logger
from mindinsight.conf import settings
from mindinsight.lineagemgr.cache_item_updater import LineageCacheItemUpdater
def init_module(app):
"""
Interface to init module.
Args:
app (Flask): An instance of Flask.
"""
# Just to suppress pylint warning about unused arg.
logger.debug("App: %s", type(app))
DATA_MANAGER.register_brief_cache_item_updater(LineageCacheItemUpdater())
DATA_MANAGER.start_load_data(reload_interval=int(settings.RELOAD_INTERVAL),
max_threads_count=int(settings.MAX_THREADS_COUNT))
......@@ -18,9 +18,6 @@ from mindinsight.backend.datavisual.static_resource_api import init_module as st
from mindinsight.backend.datavisual.task_manager_api import init_module as task_init_module
from mindinsight.backend.datavisual.train_visual_api import init_module as train_init_module
from mindinsight.conf import settings
from mindinsight.datavisual.data_transform.data_manager import DATA_MANAGER
def init_module(app):
"""
......@@ -33,6 +30,3 @@ def init_module(app):
static_init_module(app)
task_init_module(app)
train_init_module(app)
DATA_MANAGER.start_load_data(reload_interval=int(settings.RELOAD_INTERVAL),
max_threads_count=int(settings.MAX_THREADS_COUNT))
......@@ -150,3 +150,12 @@ class HistogramNotExistError(MindInsightException):
super(HistogramNotExistError, self).__init__(DataVisualErrors.HISTOGRAM_NOT_EXIST,
error_msg,
http_code=400)
class TrainJobDetailNotInCacheError(MindInsightException):
"""Detail info of given train job is not in cache."""
def __init__(self, error_detail="no detail provided."):
error_msg = f'Detail info of the given train job is not in cache. Detail: {error_detail}'
super().__init__(DataVisualErrors.TRAIN_JOB_DETAIL_NOT_IN_CACHE,
error_msg,
http_code=400)
......@@ -18,6 +18,7 @@ from mindinsight.datavisual.common import exceptions
from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.datavisual.common.validation import Validation
from mindinsight.datavisual.processors.base_processor import BaseProcessor
from mindinsight.datavisual.data_transform.data_manager import DATAVISUAL_PLUGIN_KEY, DATAVISUAL_CACHE_KEY
class TrainTaskManager(BaseProcessor):
......@@ -53,13 +54,24 @@ class TrainTaskManager(BaseProcessor):
dict, refer to restful api.
"""
Validation.check_param_empty(train_id=train_id)
train_job = self._data_manager.get_single_train_job(train_id, manual_update=manual_update)
if not train_job:
if manual_update:
self._data_manager.cache_train_job(train_id)
train_job = self._data_manager.get_train_job(train_id)
try:
data_visual_content = train_job.get_detail(DATAVISUAL_CACHE_KEY)
plugins = data_visual_content.get(DATAVISUAL_PLUGIN_KEY)
except exceptions.TrainJobDetailNotInCacheError:
plugins = []
if not plugins:
default_result = dict()
for plugin_name in PluginNameEnum.list_members():
default_result.update({plugin_name: list()})
return dict(plugins=default_result)
return dict(
plugins=train_job['tag_mapping']
plugins=plugins
)
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Cache item updater."""
import os
from mindinsight.datavisual.data_transform.data_manager import BaseCacheItemUpdater, CachedTrainJob
from mindinsight.lineagemgr.querier.query_model import LineageObj
from mindinsight.lineagemgr.summary.lineage_summary_analyzer import LineageSummaryAnalyzer
class LineageCacheItemUpdater(BaseCacheItemUpdater):
"""Cache item updater for lineage info."""
def update_item(self, cache_item: CachedTrainJob):
"""Update cache item in place."""
log_path = cache_item.summary_dir
log_dir = os.path.dirname(log_path)
lineage_info = LineageSummaryAnalyzer.get_summary_infos(log_path)
user_defined_info = LineageSummaryAnalyzer.get_user_defined_info(log_path)
lineage_obj = LineageObj(
log_dir,
train_lineage=lineage_info.train_lineage,
evaluation_lineage=lineage_info.eval_lineage,
dataset_graph=lineage_info.dataset_graph,
user_defined_info=user_defined_info
)
cache_item.set(key="lineage", value=lineage_obj)
......@@ -63,3 +63,4 @@ class DataVisualErrors(Enum):
IMAGE_NOT_EXIST = 13
SCALAR_NOT_EXIST = 14
HISTOGRAM_NOT_EXIST = 15
TRAIN_JOB_DETAIL_NOT_IN_CACHE = 16
......@@ -25,7 +25,6 @@ from flask import Response
from mindinsight.conf import settings
from mindinsight.datavisual.data_transform import data_manager
from mindinsight.datavisual.data_transform.data_manager import DataManager
from mindinsight.datavisual.data_transform.loader_generators.data_loader_generator import DataLoaderGenerator
from mindinsight.datavisual.data_transform.loader_generators.loader_generator import MAX_DATA_LOADER_SIZE
from mindinsight.datavisual.utils import tools
......@@ -59,7 +58,7 @@ def init_summary_logs():
log_operations = LogOperations()
summaries_metadata = log_operations.create_summary_logs(summary_base_dir, constants.SUMMARY_DIR_NUM_FIRST,
constants.SUMMARY_DIR_PREFIX)
mock_data_manager = DataManager([DataLoaderGenerator(summary_base_dir)])
mock_data_manager = DataManager(summary_base_dir)
mock_data_manager.start_load_data(reload_interval=0)
check_loading_done(mock_data_manager)
......
......@@ -33,7 +33,6 @@ from mindinsight.datavisual.data_transform import data_manager, ms_data_loader
from mindinsight.datavisual.data_transform.data_loader import DataLoader
from mindinsight.datavisual.data_transform.data_manager import DataManager
from mindinsight.datavisual.data_transform.events_data import EventsData
from mindinsight.datavisual.data_transform.loader_generators.data_loader_generator import DataLoaderGenerator
from mindinsight.datavisual.data_transform.loader_generators.loader_generator import MAX_DATA_LOADER_SIZE
from mindinsight.datavisual.data_transform.loader_generators.loader_struct import LoaderStruct
from mindinsight.datavisual.data_transform.ms_data_loader import MSDataLoader
......@@ -89,7 +88,7 @@ class TestDataManager:
train_ids.append(f'./dir{i}')
data_manager.logger = MockLogger
mock_manager = data_manager.DataManager([DataLoaderGenerator(summary_base_dir)])
mock_manager = data_manager.DataManager(summary_base_dir)
mock_manager.start_load_data(reload_interval=0)
check_loading_done(mock_manager)
......@@ -112,7 +111,7 @@ class TestDataManager:
def test_start_load_data_with_invalid_params(self, params):
"""Test start_load_data with invalid reload_interval or invalid max_threads_count."""
summary_base_dir = tempfile.mkdtemp()
d_manager = DataManager([DataLoaderGenerator(summary_base_dir)])
d_manager = DataManager(summary_base_dir)
with pytest.raises(ParamValueError):
d_manager.start_load_data(**params)
shutil.rmtree(summary_base_dir)
......@@ -142,9 +141,9 @@ class TestDataManager:
latest_update_time=modify_time_01,
data_loader=loader_01)
loader_pool = {train_job_01: loader}
d_manager = DataManager([DataLoaderGenerator(summary_base_dir)])
d_manager = DataManager(summary_base_dir)
d_manager._status = DataManagerStatus.LOADING.value
d_manager._loader_pool = loader_pool
d_manager._detail_cache._loader_pool = loader_pool
res = d_manager.list_tensors(train_job_01, tag)
assert res == {'test result'}
......@@ -169,9 +168,9 @@ class TestDataManager:
latest_update_time=modify_time_01,
data_loader=loader_01)
loader_pool = {train_job_01: loader}
d_manager = DataManager([DataLoaderGenerator(summary_base_dir)])
d_manager = DataManager(summary_base_dir)
d_manager._status = DataManagerStatus.LOADING.value
d_manager._loader_pool = loader_pool
d_manager._detail_cache._loader_pool = loader_pool
tag = 'image'
with pytest.raises(ParamValueError):
d_manager.list_tensors(train_job_01, tag)
......@@ -181,7 +180,7 @@ class TestDataManager:
def test_list_tensors_with_not_exist_train_job(self):
"""Test list_tensors method with parameter train_id not found in loader_pool."""
summary_base_dir = tempfile.mkdtemp()
d_manager = DataManager([DataLoaderGenerator(summary_base_dir)])
d_manager = DataManager(summary_base_dir)
d_manager._status = DataManagerStatus.LOADING.value
tag = 'image'
train_job_01 = 'train_01'
......@@ -200,13 +199,12 @@ class TestDataManager:
expected_loader_ids = list(loader_dict.keys())
mock_generate_loaders.return_value = loader_dict
generators = [data_manager.DataLoaderGenerator(summary_base_dir)]
mock_data_manager = data_manager.DataManager(generators)
mock_data_manager._execute_load_data = Mock()
mock_data_manager = data_manager.DataManager(summary_base_dir)
mock_data_manager._detail_cache._execute_load_data = Mock()
mock_data_manager.start_load_data(reload_interval=0)
check_loading_done(mock_data_manager, 3)
current_loader_ids = mock_data_manager._loader_pool.keys()
current_loader_ids = mock_data_manager._detail_cache._loader_pool.keys()
assert sorted(current_loader_ids) == sorted(expected_loader_ids)
......@@ -221,7 +219,7 @@ class TestDataManager:
mock_generate_loaders.return_value = loader_dict
mock_data_manager.start_load_data(reload_interval=0)
check_loading_done(mock_data_manager)
current_loader_ids = mock_data_manager._loader_pool.keys()
current_loader_ids = mock_data_manager._detail_cache._loader_pool.keys()
assert sorted(current_loader_ids) == sorted(expected_loader_ids)
......
......@@ -30,7 +30,6 @@ from mindinsight.datavisual.common.exceptions import GraphNotExistError
from mindinsight.datavisual.common.exceptions import NodeNotInGraphError
from mindinsight.datavisual.data_transform import data_manager
from mindinsight.datavisual.data_transform.data_manager import DataManager
from mindinsight.datavisual.data_transform.loader_generators.data_loader_generator import DataLoaderGenerator
from mindinsight.datavisual.processors.graph_processor import GraphProcessor
from mindinsight.datavisual.utils import crc32
from mindinsight.utils.exceptions import ParamValueError
......@@ -74,7 +73,7 @@ class TestGraphProcessor:
self._temp_path, self._graph_dict, _ = log_operation.generate_log(PluginNameEnum.GRAPH.value, log_dir)
self._generated_path.append(summary_base_dir)
self._mock_data_manager = data_manager.DataManager([DataLoaderGenerator(summary_base_dir)])
self._mock_data_manager = data_manager.DataManager(summary_base_dir)
self._mock_data_manager.start_load_data(reload_interval=0)
# wait for loading done
......@@ -93,7 +92,7 @@ class TestGraphProcessor:
self._generated_path.append(summary_base_dir)
self._mock_data_manager = data_manager.DataManager([DataLoaderGenerator(summary_base_dir)])
self._mock_data_manager = data_manager.DataManager(summary_base_dir)
self._mock_data_manager.start_load_data(reload_interval=0)
# wait for loading done
......
......@@ -27,7 +27,6 @@ from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.datavisual.common.exceptions import TrainJobNotExistError
from mindinsight.datavisual.common.exceptions import HistogramNotExistError
from mindinsight.datavisual.data_transform import data_manager
from mindinsight.datavisual.data_transform.loader_generators.data_loader_generator import DataLoaderGenerator
from mindinsight.datavisual.processors.histogram_processor import HistogramProcessor
from mindinsight.datavisual.utils import crc32
......@@ -72,7 +71,7 @@ class TestHistogramProcessor:
PluginNameEnum.HISTOGRAM.value, log_dir, dict(step=self._steps_list, tag=self._tag_name))
self._generated_path.append(summary_base_dir)
self._mock_data_manager = data_manager.DataManager([DataLoaderGenerator(summary_base_dir)])
self._mock_data_manager = data_manager.DataManager(summary_base_dir)
self._mock_data_manager.start_load_data(reload_interval=0)
# wait for loading done
......
......@@ -27,7 +27,6 @@ from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.datavisual.common.exceptions import TrainJobNotExistError
from mindinsight.datavisual.common.exceptions import ImageNotExistError
from mindinsight.datavisual.data_transform import data_manager
from mindinsight.datavisual.data_transform.loader_generators.data_loader_generator import DataLoaderGenerator
from mindinsight.datavisual.processors.images_processor import ImageProcessor
from mindinsight.datavisual.utils import crc32
......@@ -81,7 +80,7 @@ class TestImagesProcessor:
PluginNameEnum.IMAGE.value, log_dir, dict(steps=steps_list, tag=self._tag_name))
self._generated_path.append(summary_base_dir)
self._mock_data_manager = data_manager.DataManager([DataLoaderGenerator(summary_base_dir)])
self._mock_data_manager = data_manager.DataManager(summary_base_dir)
self._mock_data_manager.start_load_data(reload_interval=0)
# wait for loading done
......
......@@ -27,7 +27,6 @@ from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.datavisual.common.exceptions import TrainJobNotExistError
from mindinsight.datavisual.common.exceptions import ScalarNotExistError
from mindinsight.datavisual.data_transform import data_manager
from mindinsight.datavisual.data_transform.loader_generators.data_loader_generator import DataLoaderGenerator
from mindinsight.datavisual.processors.scalars_processor import ScalarsProcessor
from mindinsight.datavisual.utils import crc32
......@@ -73,7 +72,7 @@ class TestScalarsProcessor:
PluginNameEnum.SCALAR.value, log_dir, dict(step=self._steps_list, tag=self._tag_name))
self._generated_path.append(summary_base_dir)
self._mock_data_manager = data_manager.DataManager([DataLoaderGenerator(summary_base_dir)])
self._mock_data_manager = data_manager.DataManager(summary_base_dir)
self._mock_data_manager.start_load_data(reload_interval=0)
# wait for loading done
......
......@@ -27,7 +27,6 @@ import pytest
from mindinsight.datavisual.common.enums import PluginNameEnum
from mindinsight.datavisual.common.exceptions import TrainJobNotExistError
from mindinsight.datavisual.data_transform import data_manager
from mindinsight.datavisual.data_transform.loader_generators.data_loader_generator import DataLoaderGenerator
from mindinsight.datavisual.processors.train_task_manager import TrainTaskManager
from mindinsight.datavisual.utils import crc32
......@@ -97,7 +96,7 @@ class TestTrainTaskManager:
self._generated_path.append(self._root_dir)
self._mock_data_manager = data_manager.DataManager([DataLoaderGenerator(self._root_dir)])
self._mock_data_manager = data_manager.DataManager(self._root_dir)
self._mock_data_manager.start_load_data(reload_interval=0)
check_loading_done(self._mock_data_manager, time_limit=30)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
新手
引导
客服 返回
顶部