未验证 提交 3bc3e2e6 编写于 作者: C chenjian 提交者: GitHub

Add protobuf file support in profiling

上级 8d3e2818
...@@ -7,4 +7,5 @@ protobuf >= 3.11.0 ...@@ -7,4 +7,5 @@ protobuf >= 3.11.0
requests requests
six >= 1.14.0 six >= 1.14.0
matplotlib matplotlib
pandas pandas
\ No newline at end of file packaging
\ No newline at end of file
...@@ -17,6 +17,9 @@ import functools ...@@ -17,6 +17,9 @@ import functools
import json import json
import re import re
import sys import sys
import tempfile
from .utils import traverse_tree
_show_name_pattern = re.compile(r'(.+)(\[.+\])') _show_name_pattern = re.compile(r'(.+)(\[.+\])')
_show_tid_pattern = re.compile(r'\w+(\(.+\))') _show_tid_pattern = re.compile(r'\w+(\(.+\))')
...@@ -78,6 +81,25 @@ class HostNode: ...@@ -78,6 +81,25 @@ class HostNode:
self.mem_node = [] self.mem_node = []
return self return self
@classmethod
def from_protobuf(cls, obj):
self = cls()
self.name = obj.name
self.type = str(obj.type).split('.')[1]
self.start_ns = obj.start_ns
self.end_ns = obj.end_ns
self.process_id = obj.process_id
self.thread_id = obj.thread_id
self.correlation_id = obj.correlation_id
self.input_shapes = obj.input_shapes
self.dtypes = obj.dtypes
self.callstack = obj.callstack
self.children_node = []
self.runtime_node = []
self.device_node = []
self.mem_node = []
return self
class MemNode: class MemNode:
def __init__(self): def __init__(self):
...@@ -118,6 +140,22 @@ class MemNode: ...@@ -118,6 +140,22 @@ class MemNode:
'peak_reserved'] if 'peak_reserved' in json_obj['args'] else 0 'peak_reserved'] if 'peak_reserved' in json_obj['args'] else 0
return self return self
@classmethod
def from_protobuf(cls, obj):
self = cls()
self.type = str(obj.type).split('.')[1]
self.timestamp_ns = obj.timestamp_ns
self.addr = hex(int(obj.addr))
self.process_id = obj.process_id
self.thread_id = obj.thread_id
self.increase_bytes = obj.increase_bytes
self.place = obj.place
self.current_allocated = obj.current_allocated
self.current_reserved = obj.current_reserved
self.peak_allocated = obj.peak_allocated
self.peak_reserved = obj.peak_reserved
return self
class DeviceNode: class DeviceNode:
def __init__(self): def __init__(self):
...@@ -176,9 +214,35 @@ class DeviceNode: ...@@ -176,9 +214,35 @@ class DeviceNode:
"warps per SM"] if "warps per SM" in json_obj['args'] else 0 "warps per SM"] if "warps per SM" in json_obj['args'] else 0
return self return self
@classmethod
def from_protobuf(cls, obj):
self = cls()
self.name = obj.name
self.type = str(obj.type).split('.')[1]
self.start_ns = obj.start_ns
self.end_ns = obj.end_ns
self.device_id = obj.device_id
self.stream_id = obj.stream_id
self.context_id = obj.context_id
self.correlation_id = obj.correlation_id
self.block_x, self.block_y, self.block_z = [
obj.block_x, obj.block_y, obj.block_z
]
self.grid_x, self.grid_y, self.grid_z = [
obj.grid_x, obj.grid_y, obj.grid_z
]
self.shared_memory = obj.shared_memory
self.registers_per_thread = obj.registers_per_thread
self.num_bytes = obj.num_bytes
self.value = obj.value
self.occupancy = obj.occupancy * 100
self.blocks_per_sm = obj.blocks_per_sm
self.warps_per_sm = obj.warps_per_sm
return self
class ProfilerResult: class ProfilerResult:
def __init__(self, json_data): def __init__(self, data):
self.device_infos = None self.device_infos = None
self.span_idx = None self.span_idx = None
self.data = None self.data = None
...@@ -187,11 +251,18 @@ class ProfilerResult: ...@@ -187,11 +251,18 @@ class ProfilerResult:
self.has_hostnodes = True self.has_hostnodes = True
self.has_devicenodes = True self.has_devicenodes = True
self.has_memnodes = True self.has_memnodes = True
self.parse(json_data)
self.content = json_data
self.start_in_timeline_ns = None self.start_in_timeline_ns = None
if isinstance(data, dict):
def parse(self, json_data): self.parse_json(data)
self.content = data
else:
self.parse_protobuf(data)
with tempfile.NamedTemporaryFile("r") as fp:
data.save(fp.name, "json")
fp.seek(0)
self.content = json.loads(fp.read())
def parse_json(self, json_data):
self.schema_version = json_data['schemaVersion'] self.schema_version = json_data['schemaVersion']
self.span_idx = json_data['span_indx'] self.span_idx = json_data['span_indx']
self.device_infos = { self.device_infos = {
...@@ -232,6 +303,82 @@ class ProfilerResult: ...@@ -232,6 +303,82 @@ class ProfilerResult:
memnodes) memnodes)
self.extra_info = json_data['ExtraInfo'] self.extra_info = json_data['ExtraInfo']
def parse_protobuf(self, protobuf_data): # noqa: C901
self.schema_version = protobuf_data.get_version()
self.span_idx = str(protobuf_data.get_span_indx())
try:
self.device_infos = {
device_id: {
'name': device_property.name,
'totalGlobalMem': device_property.total_memory,
'computeMajor': device_property.major,
'computeMinor': device_property.minor
}
for device_id, device_property in
protobuf_data.get_device_property().items()
}
except Exception:
print(
"paddlepaddle-gpu version is needed to get GPU device informations."
)
self.device_infos = {}
self.extra_info = protobuf_data.get_extra_info()
self.start_in_timeline_ns = float('inf')
self.has_hostnodes = False
self.has_devicenodes = False
self.has_memnodes = False
node_trees = protobuf_data.get_data()
new_node_trees = {}
for threadid, root in node_trees.items():
stack = []
new_stack = []
new_root = HostNode.from_protobuf(root)
new_node_trees[threadid] = new_root
stack.append(root)
new_stack.append(new_root)
while stack:
current_node = stack.pop()
new_current_node = new_stack.pop()
for child_node in current_node.children_node:
if self.has_hostnodes is False:
self.has_hostnodes = True
new_child_node = HostNode.from_protobuf(child_node)
new_current_node.children_node.append(new_child_node)
stack.append(child_node)
new_stack.append(new_child_node)
for runtime_node in current_node.runtime_node:
new_runtime_node = HostNode.from_protobuf(runtime_node)
new_current_node.runtime_node.append(new_runtime_node)
for device_node in runtime_node.device_node:
new_device_node = DeviceNode.from_protobuf(device_node)
new_runtime_node.device_node.append(new_device_node)
for mem_node in current_node.mem_node:
new_mem_node = MemNode.from_protobuf(mem_node)
new_current_node.mem_node.append(new_mem_node)
new_node_tree_list = traverse_tree(new_node_trees)
for threadid, node_tree_list in new_node_tree_list.items():
for node in node_tree_list[1:]: # skip root
if node.start_ns < self.start_in_timeline_ns:
self.start_in_timeline_ns = node.start_ns
for threadid, node_tree_list in new_node_tree_list.items():
for node in node_tree_list:
if node != node_tree_list[0]: # skip root
node.start_ns -= self.start_in_timeline_ns
node.end_ns -= self.start_in_timeline_ns
for runtimenode in node.runtime_node:
runtimenode.end_ns -= self.start_in_timeline_ns
runtimenode.start_ns -= self.start_in_timeline_ns
for device_node in runtimenode.device_node:
if self.has_devicenodes is False:
self.has_devicenodes = True
device_node.start_ns -= self.start_in_timeline_ns
device_node.end_ns -= self.start_in_timeline_ns
for mem_node in node.mem_node:
if self.has_memnodes is False:
self.has_memnodes = True
mem_node.timestamp_ns -= self.start_in_timeline_ns
self.data = new_node_trees
def build_tree( # noqa: C901 def build_tree( # noqa: C901
self, hostnodes, runtimenodes, devicenodes, memnodes): self, hostnodes, runtimenodes, devicenodes, memnodes):
thread2host_event_nodes = collections.defaultdict(list) thread2host_event_nodes = collections.defaultdict(list)
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import collections import collections
import os
import sys
StageType = ['Dataloader', 'Forward', 'Backward', 'Optimization'] StageType = ['Dataloader', 'Forward', 'Backward', 'Optimization']
...@@ -466,7 +468,6 @@ def format_time(time, unit='ms', inf_subs='-'): ...@@ -466,7 +468,6 @@ def format_time(time, unit='ms', inf_subs='-'):
result /= 1e6 result /= 1e6
elif unit == 'us': elif unit == 'us':
result /= 1e3 result /= 1e3
# return '{:.2f}'.format(result)
return round(result, 2) return round(result, 2)
...@@ -474,7 +475,6 @@ def format_ratio(ratio): ...@@ -474,7 +475,6 @@ def format_ratio(ratio):
r""" r"""
Transform ratio within [0, 1] to percentage presentation. Transform ratio within [0, 1] to percentage presentation.
""" """
# return '{:.2f}'.format(ratio * 100)
return round(ratio * 100, 2) return round(ratio * 100, 2)
...@@ -490,5 +490,27 @@ def format_memory(memory, memory_unit='KB'): ...@@ -490,5 +490,27 @@ def format_memory(memory, memory_unit='KB'):
result /= (1024 * 1024) result /= (1024 * 1024)
elif memory_unit == 'KB': elif memory_unit == 'KB':
result /= 1024 result /= 1024
# return '{:.2f}'.format(result)
return round(result, 2) return round(result, 2)
class RedirectStdStreams(object):
def __init__(self, stdout=None, stderr=None):
self._stdout = stdout or sys.stdout
self._stderr = stderr or sys.stderr
def __enter__(self):
'''
Replace stdout and stderr to specified stream.
'''
sys.stdout.flush()
sys.stderr.flush()
self.old_stdout_fileno, self.old_stderr_fileno = os.dup(
sys.stdout.fileno()), os.dup(sys.stderr.fileno())
os.dup2(self._stdout.fileno(), sys.stdout.fileno())
os.dup2(self._stderr.fileno(), sys.stderr.fileno())
def __exit__(self, exc_type, exc_value, traceback):
os.dup2(self.old_stdout_fileno, sys.stdout.fileno())
os.dup2(self.old_stderr_fileno, sys.stderr.fileno())
os.close(self.old_stdout_fileno)
os.close(self.old_stderr_fileno)
...@@ -147,39 +147,73 @@ class ProfilerData: ...@@ -147,39 +147,73 @@ class ProfilerData:
else: else:
device_type = 'GPU' device_type = 'GPU'
gpu_id = int(next(iter(self.gpu_ids))) gpu_id = int(next(iter(self.gpu_ids)))
return { if gpu_id in self.device_infos:
"device_type": device_type, return {
"CPU": { "device_type": device_type,
"process_utilization": "CPU": {
format_ratio( "process_utilization":
float(self.extra_infos["Process Cpu Utilization"])), format_ratio(
"system_utilization": float(
format_ratio( self.extra_infos["Process Cpu Utilization"])),
float(self.extra_infos["System Cpu Utilization"])) "system_utilization":
}, format_ratio(
"GPU": { float(self.extra_infos["System Cpu Utilization"]))
"name": },
self.device_infos[gpu_id]['name'], "GPU": {
"memory": "name":
"{} GB".format( self.device_infos[gpu_id]['name'],
format_memory( "memory":
self.device_infos[gpu_id]['totalGlobalMem'], "{} GB".format(
'GB')), format_memory(
"compute_capability": self.device_infos[gpu_id]['totalGlobalMem'],
'{}.{}'.format(self.device_infos[gpu_id]['computeMajor'], 'GB')),
self.device_infos[gpu_id]['computeMinor']), "compute_capability":
"utilization": '{}.{}'.format(
format_ratio(self.gpu_ulitization), self.device_infos[gpu_id]['computeMajor'],
"sm_efficiency": self.device_infos[gpu_id]['computeMinor']),
format_ratio( "utilization":
self.sm_efficiency / format_ratio(self.gpu_ulitization),
self.model_perspective_items['ProfileStep'].cpu_time), "sm_efficiency":
"achieved_occupancy": format_ratio(
format_ratio(self.occupancy), self.sm_efficiency / self.
"tensor_core_percentage": model_perspective_items['ProfileStep'].cpu_time),
format_ratio(self.tensorcore_ratio) "achieved_occupancy":
format_ratio(self.occupancy),
"tensor_core_percentage":
format_ratio(self.tensorcore_ratio)
}
}
else:
return {
"device_type": device_type,
"CPU": {
"process_utilization":
format_ratio(
float(
self.extra_infos["Process Cpu Utilization"])),
"system_utilization":
format_ratio(
float(self.extra_infos["System Cpu Utilization"]))
},
"GPU": {
"name":
"-",
"memory":
"-",
"compute_capability":
'-',
"utilization":
format_ratio(self.gpu_ulitization),
"sm_efficiency":
format_ratio(
self.sm_efficiency / self.
model_perspective_items['ProfileStep'].cpu_time),
"achieved_occupancy":
format_ratio(self.occupancy),
"tensor_core_percentage":
format_ratio(self.tensorcore_ratio)
}
} }
}
def get_model_perspective(self, time_unit): def get_model_perspective(self, time_unit):
''' '''
......
...@@ -16,11 +16,14 @@ import os ...@@ -16,11 +16,14 @@ import os
import re import re
from threading import Thread from threading import Thread
import packaging.version
from multiprocess import Process from multiprocess import Process
from multiprocess import Queue from multiprocess import Queue
from .parser.const_description import * # noqa: F403 from .parser.const_description import * # noqa: F403
from .parser.event_node import load_profiler_json from .parser.event_node import load_profiler_json
from .parser.event_node import ProfilerResult
from .parser.utils import RedirectStdStreams
from .run_manager import RunManager from .run_manager import RunManager
from visualdl.io import bfile from visualdl.io import bfile
...@@ -175,17 +178,44 @@ class ProfilerReader(object): ...@@ -175,17 +178,44 @@ class ProfilerReader(object):
if match: if match:
worker_name = match.group(1) worker_name = match.group(1)
if '.pb' in filename: if '.pb' in filename:
try:
from paddle.profiler import load_profiler_result def _load_profiler_protobuf(run, filename, worker_name):
except Exception: devnull = open(os.devnull, 'w+')
print( with RedirectStdStreams(stdout=devnull, stderr=devnull):
'Load paddle.profiler error. Please check paddle >= 2.3.0' try:
) import paddle
exit(0) except Exception as e:
profile_result = load_profiler_result( print('Paddle is required to read protobuf file.\
os.path.join(run, filename)) Please install [paddlepaddle](https://www.paddlepaddle.org.cn/install/quick?\
self.run_managers[run].add_profile_result( docurl=/documentation/docs/zh/develop/install/pip/linux-pip.html) first.'
filename, worker_name, profile_result) )
raise RuntimeError(str(e))
if packaging.version.parse(
'2.4.0') > packaging.version.parse(
paddle.__version__):
raise RuntimeError(
"Please make sure paddlepaddle version >= 2.4.0"
)
from paddle.profiler import load_profiler_result
try:
content = load_profiler_result(
os.path.join(run, filename))
if content is None:
raise RuntimeError("Missing required fields.")
profile_result = ProfilerResult(content)
except Exception as e:
raise RuntimeError(
"An error occurred while loading the protobuf file, which may be caused\
by the outdated version of paddle that generated the profile file. \
Please make sure protobuf file is exported by paddlepaddle version >= 2.4.0. \
Error message: {}".format(e))
self.profile_result_queue.put(
(run, filename, worker_name, profile_result))
Process(
target=_load_profiler_protobuf,
args=(run, filename, worker_name)).start()
else: else:
def _load_profiler_json(run, filename, worker_name): def _load_profiler_json(run, filename, worker_name):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册