diff --git a/mindinsight/datavisual/data_transform/data_manager.py b/mindinsight/datavisual/data_transform/data_manager.py index 1ec86bfe7cf6d1894b86a028cea39f58e19d553a..c63f8218141928d129fe9f66cde1084dd9981038 100644 --- a/mindinsight/datavisual/data_transform/data_manager.py +++ b/mindinsight/datavisual/data_transform/data_manager.py @@ -42,6 +42,7 @@ from mindinsight.datavisual.data_transform.loader_generators.loader_generator im from mindinsight.datavisual.data_transform.loader_generators.data_loader_generator import DataLoaderGenerator from mindinsight.utils.exceptions import MindInsightException from mindinsight.utils.exceptions import ParamValueError +from mindinsight.utils.exceptions import UnknownError class _BasicTrainJob: @@ -861,7 +862,7 @@ class DataManager: # Let gunicorn load other modules first. time.sleep(1) while True: - self._load_data_in_thread() + self._load_data_in_thread_wrapper() if not self._reload_interval: break @@ -874,18 +875,26 @@ class DataManager: This function needs to be used after `start_load_data` function. """ logger.debug("start to reload data") - thread = threading.Thread(target=self._load_data_in_thread, + thread = threading.Thread(target=self._load_data_in_thread_wrapper, name='reload_data_thread') thread.daemon = False thread.start() + def _load_data_in_thread_wrapper(self): + """Wrapper for load data in thread.""" + try: + self._load_data_in_thread() + except MindInsightException as exc: + # Not raising the exception here to ensure that data reloading does not crash. + logger.warning(exc.message) + def _load_data_in_thread(self): """Log (but not swallow) exceptions in thread to help debugging.""" try: self._load_data() except Exception as exc: logger.exception(exc) - raise + raise UnknownError('Load data thread error.') def _load_data(self): """This function will load data once and ignore it if the status is loading.""" diff --git a/mindinsight/datavisual/data_transform/summary_watcher.py b/mindinsight/datavisual/data_transform/summary_watcher.py index e4b34a00143439f7fc2d5ee342aeaaaae275887d..e87d5374c8424294a701ccd3a5d6df56b3c9dd47 100644 --- a/mindinsight/datavisual/data_transform/summary_watcher.py +++ b/mindinsight/datavisual/data_transform/summary_watcher.py @@ -209,8 +209,14 @@ class SummaryWatcher: starting with "./" . entry (DirEntry): Directory entry instance needed to check with regular expression. """ - ctime = datetime.datetime.fromtimestamp(entry.stat().st_ctime).astimezone() - mtime = datetime.datetime.fromtimestamp(entry.stat().st_mtime).astimezone() + try: + stat = entry.stat() + except FileNotFoundError: + logger.warning('File %s not found', entry.name) + return + + ctime = datetime.datetime.fromtimestamp(stat.st_ctime).astimezone() + mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone() if entry.is_file(): summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name) @@ -304,7 +310,13 @@ class SummaryWatcher: return False def _is_empty_directory(self, directory): - return not bool(os.listdir(directory)) + try: + count = len(os.listdir(directory)) + except FileNotFoundError: + logger.warning('Directory %s not found.', directory) + count = 0 + + return not bool(count) def list_summary_directories_by_pagination(self, summary_base_dir, offset=0, limit=10): """ @@ -388,8 +400,13 @@ class SummaryWatcher: except OverflowError: continue - # extract modified time from filesystem - mtime = datetime.datetime.fromtimestamp(entry.stat().st_mtime).astimezone() + try: + stat = entry.stat() + except FileNotFoundError: + logger.warning('File %s not found.', entry.name) + continue + + mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone() summaries.append({ 'file_name': entry.name,