提交 8585e015 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!281 add roboust check for summary watcher in case of FileNotFound exception

Merge pull request !281 from liangyongxiong/fix-summary-watcher
......@@ -42,6 +42,7 @@ from mindinsight.datavisual.data_transform.loader_generators.loader_generator im
from mindinsight.datavisual.data_transform.loader_generators.data_loader_generator import DataLoaderGenerator
from mindinsight.utils.exceptions import MindInsightException
from mindinsight.utils.exceptions import ParamValueError
from mindinsight.utils.exceptions import UnknownError
class _BasicTrainJob:
......@@ -861,7 +862,7 @@ class DataManager:
# Let gunicorn load other modules first.
time.sleep(1)
while True:
self._load_data_in_thread()
self._load_data_in_thread_wrapper()
if not self._reload_interval:
break
......@@ -874,18 +875,26 @@ class DataManager:
This function needs to be used after `start_load_data` function.
"""
logger.debug("start to reload data")
thread = threading.Thread(target=self._load_data_in_thread,
thread = threading.Thread(target=self._load_data_in_thread_wrapper,
name='reload_data_thread')
thread.daemon = False
thread.start()
def _load_data_in_thread_wrapper(self):
"""Wrapper for load data in thread."""
try:
self._load_data_in_thread()
except MindInsightException as exc:
# Not raising the exception here to ensure that data reloading does not crash.
logger.warning(exc.message)
def _load_data_in_thread(self):
"""Log (but not swallow) exceptions in thread to help debugging."""
try:
self._load_data()
except Exception as exc:
logger.exception(exc)
raise
raise UnknownError('Load data thread error.')
def _load_data(self):
"""This function will load data once and ignore it if the status is loading."""
......
......@@ -209,8 +209,14 @@ class SummaryWatcher:
starting with "./" .
entry (DirEntry): Directory entry instance needed to check with regular expression.
"""
ctime = datetime.datetime.fromtimestamp(entry.stat().st_ctime).astimezone()
mtime = datetime.datetime.fromtimestamp(entry.stat().st_mtime).astimezone()
try:
stat = entry.stat()
except FileNotFoundError:
logger.warning('File %s not found', entry.name)
return
ctime = datetime.datetime.fromtimestamp(stat.st_ctime).astimezone()
mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone()
if entry.is_file():
summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
......@@ -304,7 +310,13 @@ class SummaryWatcher:
return False
def _is_empty_directory(self, directory):
return not bool(os.listdir(directory))
try:
count = len(os.listdir(directory))
except FileNotFoundError:
logger.warning('Directory %s not found.', directory)
count = 0
return not bool(count)
def list_summary_directories_by_pagination(self, summary_base_dir, offset=0, limit=10):
"""
......@@ -388,8 +400,13 @@ class SummaryWatcher:
except OverflowError:
continue
# extract modified time from filesystem
mtime = datetime.datetime.fromtimestamp(entry.stat().st_mtime).astimezone()
try:
stat = entry.stat()
except FileNotFoundError:
logger.warning('File %s not found.', entry.name)
continue
mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone()
summaries.append({
'file_name': entry.name,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册