未验证 提交 30846bc9 编写于 作者: C chenjian 提交者: GitHub

fix op filter rule (#44063)

上级 faaa95ca
...@@ -79,19 +79,14 @@ class HostStatisticNode: ...@@ -79,19 +79,14 @@ class HostStatisticNode:
self.self_gpu_time = 0 self.self_gpu_time = 0
self.general_gpu_time = 0 # besides kernel, include time of gpu events like memcpy and memset self.general_gpu_time = 0 # besides kernel, include time of gpu events like memcpy and memset
self.self_general_gpu_time = 0 self.self_general_gpu_time = 0
self.is_terminal_operator_node = True
def cal_statistic(self): def cal_statistic(self):
for child in self.children_node: for child in self.children_node:
child.cal_statistic() child.cal_statistic()
if child.is_terminal_operator_node == False:
self.is_terminal_operator_node = False
for rt in self.runtime_node: for rt in self.runtime_node:
rt.cal_statistic() rt.cal_statistic()
self.cpu_time = self.hostnode.end_ns - self.hostnode.start_ns self.cpu_time = self.hostnode.end_ns - self.hostnode.start_ns
for child in self.children_node: for child in self.children_node:
if child.type == TracerEventType.Operator:
self.is_terminal_operator_node = False
self.gpu_time += child.gpu_time self.gpu_time += child.gpu_time
self.general_gpu_time += child.general_gpu_time self.general_gpu_time += child.general_gpu_time
self.self_cpu_time -= (child.end_ns - child.start_ns) self.self_cpu_time -= (child.end_ns - child.start_ns)
...@@ -421,10 +416,11 @@ class EventSummary: ...@@ -421,10 +416,11 @@ class EventSummary:
self.add_gpu_time(node.gpu_time) self.add_gpu_time(node.gpu_time)
self.add_general_gpu_time(node.general_gpu_time) self.add_general_gpu_time(node.general_gpu_time)
for child in node.children_node: for child in node.children_node:
if child.name not in self.operator_inners: if child.type != TracerEventType.Operator:
self.operator_inners[ if child.name not in self.operator_inners:
child.name] = EventSummary.OperatorItem(child.name) self.operator_inners[
self.operator_inners[child.name].add_item(child) child.name] = EventSummary.OperatorItem(child.name)
self.operator_inners[child.name].add_item(child)
for runtimenode in node.runtime_node: for runtimenode in node.runtime_node:
for devicenode in runtimenode.device_node: for devicenode in runtimenode.device_node:
...@@ -537,8 +533,6 @@ class EventSummary: ...@@ -537,8 +533,6 @@ class EventSummary:
deque.append(child) deque.append(child)
def add_operator_item(self, operator_node): def add_operator_item(self, operator_node):
if operator_node.is_terminal_operator_node == False:
return
if operator_node.name not in self.items: if operator_node.name not in self.items:
self.items[operator_node.name] = EventSummary.OperatorItem( self.items[operator_node.name] = EventSummary.OperatorItem(
operator_node.name) operator_node.name)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册