提交 4913543f 编写于 作者: L liangyongxiong

add robust check for summary watcher in case of FileNotFound exception

上级 c8f47e6e
...@@ -42,6 +42,7 @@ from mindinsight.datavisual.data_transform.loader_generators.loader_generator im ...@@ -42,6 +42,7 @@ from mindinsight.datavisual.data_transform.loader_generators.loader_generator im
from mindinsight.datavisual.data_transform.loader_generators.data_loader_generator import DataLoaderGenerator from mindinsight.datavisual.data_transform.loader_generators.data_loader_generator import DataLoaderGenerator
from mindinsight.utils.exceptions import MindInsightException from mindinsight.utils.exceptions import MindInsightException
from mindinsight.utils.exceptions import ParamValueError from mindinsight.utils.exceptions import ParamValueError
from mindinsight.utils.exceptions import UnknownError
class _BasicTrainJob: class _BasicTrainJob:
...@@ -861,7 +862,7 @@ class DataManager: ...@@ -861,7 +862,7 @@ class DataManager:
# Let gunicorn load other modules first. # Let gunicorn load other modules first.
time.sleep(1) time.sleep(1)
while True: while True:
self._load_data_in_thread() self._load_data_in_thread_wrapper()
if not self._reload_interval: if not self._reload_interval:
break break
...@@ -874,18 +875,26 @@ class DataManager: ...@@ -874,18 +875,26 @@ class DataManager:
This function needs to be used after `start_load_data` function. This function needs to be used after `start_load_data` function.
""" """
logger.debug("start to reload data") logger.debug("start to reload data")
thread = threading.Thread(target=self._load_data_in_thread, thread = threading.Thread(target=self._load_data_in_thread_wrapper,
name='reload_data_thread') name='reload_data_thread')
thread.daemon = False thread.daemon = False
thread.start() thread.start()
def _load_data_in_thread_wrapper(self):
"""Wrapper for load data in thread."""
try:
self._load_data_in_thread()
except MindInsightException as exc:
# Not raising the exception here to ensure that data reloading does not crash.
logger.warning(exc.message)
def _load_data_in_thread(self): def _load_data_in_thread(self):
"""Log (but not swallow) exceptions in thread to help debugging.""" """Log (but not swallow) exceptions in thread to help debugging."""
try: try:
self._load_data() self._load_data()
except Exception as exc: except Exception as exc:
logger.exception(exc) logger.exception(exc)
raise raise UnknownError('Load data thread error.')
def _load_data(self): def _load_data(self):
"""This function will load data once and ignore it if the status is loading.""" """This function will load data once and ignore it if the status is loading."""
......
...@@ -209,8 +209,14 @@ class SummaryWatcher: ...@@ -209,8 +209,14 @@ class SummaryWatcher:
starting with "./" . starting with "./" .
entry (DirEntry): Directory entry instance needed to check with regular expression. entry (DirEntry): Directory entry instance needed to check with regular expression.
""" """
ctime = datetime.datetime.fromtimestamp(entry.stat().st_ctime).astimezone() try:
mtime = datetime.datetime.fromtimestamp(entry.stat().st_mtime).astimezone() stat = entry.stat()
except FileNotFoundError:
logger.warning('File %s not found', entry.name)
return
ctime = datetime.datetime.fromtimestamp(stat.st_ctime).astimezone()
mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone()
if entry.is_file(): if entry.is_file():
summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name) summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
...@@ -304,7 +310,13 @@ class SummaryWatcher: ...@@ -304,7 +310,13 @@ class SummaryWatcher:
return False return False
def _is_empty_directory(self, directory): def _is_empty_directory(self, directory):
return not bool(os.listdir(directory)) try:
count = len(os.listdir(directory))
except FileNotFoundError:
logger.warning('Directory %s not found.', directory)
count = 0
return not bool(count)
def list_summary_directories_by_pagination(self, summary_base_dir, offset=0, limit=10): def list_summary_directories_by_pagination(self, summary_base_dir, offset=0, limit=10):
""" """
...@@ -388,8 +400,13 @@ class SummaryWatcher: ...@@ -388,8 +400,13 @@ class SummaryWatcher:
except OverflowError: except OverflowError:
continue continue
# extract modified time from filesystem try:
mtime = datetime.datetime.fromtimestamp(entry.stat().st_mtime).astimezone() stat = entry.stat()
except FileNotFoundError:
logger.warning('File %s not found.', entry.name)
continue
mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone()
summaries.append({ summaries.append({
'file_name': entry.name, 'file_name': entry.name,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册