diff --git a/visualdl/component/profiler/parser/event_node.py b/visualdl/component/profiler/parser/event_node.py index b7df5830779b21ab1fc0a103bf2f4ee03b71a547..b3d0eebf78af05b246b3151b85718a17898a0253 100644 --- a/visualdl/component/profiler/parser/event_node.py +++ b/visualdl/component/profiler/parser/event_node.py @@ -265,10 +265,16 @@ class ProfilerResult: def parse_json(self, json_data): self.schema_version = json_data['schemaVersion'] self.span_idx = json_data['span_indx'] - self.device_infos = { - device_info['id']: device_info - for device_info in json_data['deviceProperties'] - } + try: + self.device_infos = { + device_info['id']: device_info + for device_info in json_data['deviceProperties'] + } + except Exception: + print( + "paddlepaddle-gpu version is needed to get GPU device informations." + ) + self.device_infos = {} hostnodes = [] runtimenodes = [] devicenodes = [] diff --git a/visualdl/component/profiler/profiler_data.py b/visualdl/component/profiler/profiler_data.py index 100f8a2459cf32e9a760e24c11c9eab86359a87d..e6ce7f75b95c9e177f5ccb61bba05f3ba3e12637 100644 --- a/visualdl/component/profiler/profiler_data.py +++ b/visualdl/component/profiler/profiler_data.py @@ -1767,6 +1767,8 @@ class DistributedProfilerData: data = [] for profile_data in self.profile_datas: device_infos = profile_data.device_infos + if not device_infos: + return data gpu_id = int(next(iter(profile_data.gpu_ids))) data.append({ 'worker_name': diff --git a/visualdl/component/profiler/profiler_reader.py b/visualdl/component/profiler/profiler_reader.py index 79f19543df62410ca4ff9c6e6089a27f0c9db7bb..8b4d77831f810119b8508e94388b37e3eb4a5e2b 100644 --- a/visualdl/component/profiler/profiler_reader.py +++ b/visualdl/component/profiler/profiler_reader.py @@ -14,6 +14,7 @@ # ======================================================================= import os import re +from threading import Lock from threading import Thread import packaging.version @@ -28,6 +29,7 @@ from .run_manager import RunManager from visualdl.io import bfile _name_pattern = re.compile(r"(.+)_time_(.+)\.paddle_trace\.((pb)|(json))") +_lock = Lock() def is_VDLProfiler_file(path): @@ -118,8 +120,10 @@ class ProfilerReader(object): self.run_managers[run] = RunManager(run) self.run_managers[run].set_all_filenames(filenames) for filename in filenames: - if self.run_managers[run].has_handled(filename): - continue + with _lock: # we add this to prevent parallel requests for handling a file multiple times + if self.run_managers[run].has_handled(filename): + continue + self.run_managers[run].handled_filenames.add(filename) self._read_data(run, filename) return list(self.walks.keys()) diff --git a/visualdl/component/profiler/profiler_server.py b/visualdl/component/profiler/profiler_server.py index bb497706325b80e8220cd0b81231f9ce2a98c408..8708a00aad918f66b151051079d3f42dbb546ce2 100644 --- a/visualdl/component/profiler/profiler_server.py +++ b/visualdl/component/profiler/profiler_server.py @@ -194,6 +194,8 @@ class ProfilerApi(object): run_manager = self._reader.get_run_manager(run) distributed_profiler_data = run_manager.get_distributed_profiler_data( span) + if distributed_profiler_data is None: + return return distributed_profiler_data.get_distributed_steps() @result() @@ -201,6 +203,8 @@ class ProfilerApi(object): run_manager = self._reader.get_run_manager(run) distributed_profiler_data = run_manager.get_distributed_profiler_data( span) + if distributed_profiler_data is None: + return return distributed_profiler_data.get_distributed_histogram( step, time_unit) diff --git a/visualdl/component/profiler/run_manager.py b/visualdl/component/profiler/run_manager.py index 037ca02e5d77ae40088c8b8f3b76da64fac403fb..418626abf0a212a68a7bc13e8019d9de9728037c 100644 --- a/visualdl/component/profiler/run_manager.py +++ b/visualdl/component/profiler/run_manager.py @@ -104,11 +104,8 @@ class RunManager: return def join(self): - if self.has_join: - return for thread in self.threads.values(): thread.join() - self.has_join = True distributed_profiler_data = defaultdict(list) for worker_name, span_data in self.profiler_data.items(): for span_idx, profiler_data in span_data.items():