Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
360b8383
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
360b8383
编写于
6月 13, 2022
作者:
C
chenjian
提交者:
GitHub
6月 13, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add serialization for new field in event node (#43405)
* add serialization for new field in event node * fix a bug
上级
30b10630
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
307 addition
and
1 deletion
+307
-1
paddle/fluid/platform/profiler/dump/deserialization_reader.cc
...le/fluid/platform/profiler/dump/deserialization_reader.cc
+76
-0
paddle/fluid/platform/profiler/dump/deserialization_reader.h
paddle/fluid/platform/profiler/dump/deserialization_reader.h
+3
-0
paddle/fluid/platform/profiler/dump/nodetree.proto
paddle/fluid/platform/profiler/dump/nodetree.proto
+70
-0
paddle/fluid/platform/profiler/dump/serialization_logger.cc
paddle/fluid/platform/profiler/dump/serialization_logger.cc
+77
-1
paddle/fluid/platform/profiler/dump/serialization_logger.h
paddle/fluid/platform/profiler/dump/serialization_logger.h
+3
-0
paddle/fluid/platform/profiler/dump/test_serialization_logger.cc
...fluid/platform/profiler/dump/test_serialization_logger.cc
+20
-0
paddle/fluid/platform/profiler/event_python.cc
paddle/fluid/platform/profiler/event_python.cc
+26
-0
paddle/fluid/platform/profiler/event_python.h
paddle/fluid/platform/profiler/event_python.h
+32
-0
未找到文件。
paddle/fluid/platform/profiler/dump/deserialization_reader.cc
浏览文件 @
360b8383
...
...
@@ -92,6 +92,26 @@ std::unique_ptr<ProfilerResult> DeserializationReader::Parse() {
device_node
);
// insert into runtime_node
}
}
// handle mem node
for
(
int
mem_node_index
=
0
;
mem_node_index
<
host_node_proto
.
mem_nodes_size
();
mem_node_index
++
)
{
const
MemTraceEventNodeProto
&
mem_node_proto
=
host_node_proto
.
mem_nodes
(
mem_node_index
);
MemTraceEventNode
*
mem_node
=
RestoreMemTraceEventNode
(
mem_node_proto
);
host_node
->
AddMemNode
(
mem_node
);
}
// handle op supplement node
for
(
int
op_supplement_node_index
=
0
;
op_supplement_node_index
<
host_node_proto
.
op_supplement_nodes_size
();
op_supplement_node_index
++
)
{
const
OperatorSupplementEventNodeProto
&
op_supplement_node_proto
=
host_node_proto
.
op_supplement_nodes
(
op_supplement_node_index
);
OperatorSupplementEventNode
*
op_supplement_node
=
RestoreOperatorSupplementEventNode
(
op_supplement_node_proto
);
host_node
->
SetOperatorSupplementNode
(
op_supplement_node
);
}
}
// restore parent-child relationship
for
(
auto
it
=
child_parent_map
.
begin
();
it
!=
child_parent_map
.
end
();
...
...
@@ -176,6 +196,62 @@ HostTraceEventNode* DeserializationReader::RestoreHostTraceEventNode(
return
new
HostTraceEventNode
(
host_event
);
}
MemTraceEventNode
*
DeserializationReader
::
RestoreMemTraceEventNode
(
const
MemTraceEventNodeProto
&
mem_node_proto
)
{
const
MemTraceEventProto
&
mem_event_proto
=
mem_node_proto
.
mem_event
();
MemTraceEvent
mem_event
;
mem_event
.
timestamp_ns
=
mem_event_proto
.
timestamp_ns
();
mem_event
.
addr
=
mem_event_proto
.
addr
();
mem_event
.
type
=
static_cast
<
TracerMemEventType
>
(
mem_event_proto
.
type
());
mem_event
.
process_id
=
mem_event_proto
.
process_id
();
mem_event
.
thread_id
=
mem_event_proto
.
thread_id
();
mem_event
.
increase_bytes
=
mem_event_proto
.
increase_bytes
();
mem_event
.
place
=
mem_event_proto
.
place
();
mem_event
.
current_allocated
=
mem_event_proto
.
current_allocated
();
mem_event
.
current_reserved
=
mem_event_proto
.
current_reserved
();
return
new
MemTraceEventNode
(
mem_event
);
}
OperatorSupplementEventNode
*
DeserializationReader
::
RestoreOperatorSupplementEventNode
(
const
OperatorSupplementEventNodeProto
&
op_supplement_node_proto
)
{
const
OperatorSupplementEventProto
&
op_supplement_event_proto
=
op_supplement_node_proto
.
op_supplement_event
();
OperatorSupplementEvent
op_supplement_event
;
op_supplement_event
.
timestamp_ns
=
op_supplement_event_proto
.
timestamp_ns
();
op_supplement_event
.
op_type
=
op_supplement_event_proto
.
op_type
();
op_supplement_event
.
callstack
=
op_supplement_event_proto
.
callstack
();
op_supplement_event
.
process_id
=
op_supplement_event_proto
.
process_id
();
op_supplement_event
.
thread_id
=
op_supplement_event_proto
.
thread_id
();
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
vector
<
int64_t
>>>
input_shapes
;
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
dtypes
;
auto
input_shape_proto
=
op_supplement_event_proto
.
input_shapes
();
for
(
int
i
=
0
;
i
<
input_shape_proto
.
key_size
();
i
++
)
{
auto
input_shape_vec
=
input_shapes
[
input_shape_proto
.
key
(
i
)];
auto
shape_vectors_proto
=
input_shape_proto
.
shape_vecs
(
i
);
for
(
int
j
=
0
;
j
<
shape_vectors_proto
.
shapes_size
();
j
++
)
{
auto
shape_vector_proto
=
shape_vectors_proto
.
shapes
(
j
);
std
::
vector
<
int64_t
>
shape
;
for
(
int
k
=
0
;
k
<
shape_vector_proto
.
size_size
();
k
++
)
{
shape
.
push_back
(
shape_vector_proto
.
size
(
k
));
}
input_shape_vec
.
push_back
(
shape
);
}
}
op_supplement_event
.
input_shapes
=
input_shapes
;
auto
dtype_proto
=
op_supplement_event_proto
.
dtypes
();
for
(
int
i
=
0
;
i
<
dtype_proto
.
key_size
();
i
++
)
{
auto
dtype_vec
=
dtypes
[
dtype_proto
.
key
(
i
)];
auto
dtype_vec_proto
=
dtype_proto
.
dtype_vecs
(
i
);
for
(
int
j
=
0
;
j
<
dtype_vec_proto
.
dtype_size
();
j
++
)
{
auto
dtype_string
=
dtype_vec_proto
.
dtype
(
j
);
dtype_vec
.
push_back
(
dtype_string
);
}
}
op_supplement_event
.
dtypes
=
dtypes
;
return
new
OperatorSupplementEventNode
(
op_supplement_event
);
}
KernelEventInfo
DeserializationReader
::
HandleKernelEventInfoProto
(
const
DeviceTraceEventProto
&
device_event_proto
)
{
const
KernelEventInfoProto
&
kernel_info_proto
=
...
...
paddle/fluid/platform/profiler/dump/deserialization_reader.h
浏览文件 @
360b8383
...
...
@@ -36,6 +36,9 @@ class DeserializationReader {
KernelEventInfo
HandleKernelEventInfoProto
(
const
DeviceTraceEventProto
&
);
MemcpyEventInfo
HandleMemcpyEventInfoProto
(
const
DeviceTraceEventProto
&
);
MemsetEventInfo
HandleMemsetEventInfoProto
(
const
DeviceTraceEventProto
&
);
MemTraceEventNode
*
RestoreMemTraceEventNode
(
const
MemTraceEventNodeProto
&
);
OperatorSupplementEventNode
*
RestoreOperatorSupplementEventNode
(
const
OperatorSupplementEventNodeProto
&
);
std
::
string
filename_
;
std
::
ifstream
input_file_stream_
;
NodeTreesProto
*
node_trees_proto_
;
...
...
paddle/fluid/platform/profiler/dump/nodetree.proto
浏览文件 @
360b8383
...
...
@@ -46,6 +46,15 @@ enum TracerEventTypeProto {
PythonOp
=
13
;
// Used to mark python level userdefined
PythonUserDefined
=
14
;
// Used to mark mlu runtime record returned by cnpapi
MluRuntime
=
15
;
};
enum
TracerMemEventTypeProto
{
// Used to mark memory allocation
Allocate
=
0
;
// Used to mark memory free
Free
=
1
;
};
message
KernelEventInfoProto
{
...
...
@@ -121,6 +130,58 @@ message HostTraceEventProto {
required
uint64
thread_id
=
6
;
}
message
MemTraceEventProto
{
// timestamp of the record
required
uint64
timestamp_ns
=
1
;
// memory manipulation type
required
TracerMemEventTypeProto
type
=
2
;
// memory addr of allocation or free
required
uint64
addr
=
3
;
// process id of the record
required
uint64
process_id
=
4
;
// thread id of the record
required
uint64
thread_id
=
5
;
// increase bytes after this manipulation, allocation for sign +, free for
// sign -
required
int64
increase_bytes
=
6
;
// place
required
string
place
=
7
;
// current total allocated memory
required
uint64
current_allocated
=
8
;
// current total reserved memory
required
uint64
current_reserved
=
9
;
}
message
OperatorSupplementEventProto
{
// timestamp of the record
required
uint64
timestamp_ns
=
1
;
// op type name
required
string
op_type
=
2
;
// process id of the record
required
uint64
process_id
=
3
;
// thread id of the record
required
uint64
thread_id
=
4
;
// input shapes
message
input_shape_proto
{
repeated
string
key
=
1
;
message
shape_vector
{
message
shape
{
repeated
uint64
size
=
1
;
}
repeated
shape
shapes
=
1
;
}
repeated
shape_vector
shape_vecs
=
2
;
}
required
input_shape_proto
input_shapes
=
5
;
// dtypes
message
dtype_proto
{
repeated
string
key
=
1
;
message
dtype_vector
{
repeated
string
dtype
=
1
;
}
repeated
dtype_vector
dtype_vecs
=
2
;
}
required
dtype_proto
dtypes
=
6
;
// call stack
required
string
callstack
=
7
;
}
message
CudaRuntimeTraceEventProto
{
// record name
required
string
name
=
1
;
...
...
@@ -166,6 +227,12 @@ message DeviceTraceEventProto {
}
}
message
OperatorSupplementEventNodeProto
{
required
OperatorSupplementEventProto
op_supplement_event
=
1
;
}
message
MemTraceEventNodeProto
{
required
MemTraceEventProto
mem_event
=
1
;
}
message
DeviceTraceEventNodeProto
{
required
DeviceTraceEventProto
device_event
=
1
;
}
...
...
@@ -180,6 +247,9 @@ message HostTraceEventNodeProto {
required
int64
parentid
=
2
;
required
HostTraceEventProto
host_trace_event
=
3
;
repeated
CudaRuntimeTraceEventNodeProto
runtime_nodes
=
4
;
// below is added in version 1.0.1
repeated
MemTraceEventNodeProto
mem_nodes
=
5
;
repeated
OperatorSupplementEventNodeProto
op_supplement_nodes
=
6
;
}
message
ThreadNodeTreeProto
{
...
...
paddle/fluid/platform/profiler/dump/serialization_logger.cc
浏览文件 @
360b8383
...
...
@@ -20,7 +20,7 @@ namespace paddle {
namespace
platform
{
static
const
char
*
kDefaultFilename
=
"pid_%s_time_%s.paddle_trace.pb"
;
static
const
char
*
version
=
"1.0.
0
"
;
static
const
char
*
version
=
"1.0.
1
"
;
static
uint32_t
span_indx
=
0
;
static
std
::
string
DefaultFileName
()
{
...
...
@@ -106,10 +106,33 @@ void SerializationLogger::LogNodeTrees(const NodeTrees& node_trees) {
(
*
devicenode
)
->
LogMe
(
this
);
// fill detail information
}
}
for
(
auto
memnode
=
(
*
hostnode
)
->
GetMemTraceEventNodes
().
begin
();
memnode
!=
(
*
hostnode
)
->
GetMemTraceEventNodes
().
end
();
++
memnode
)
{
MemTraceEventNodeProto
*
mem_node_proto
=
current_host_trace_event_node_proto_
->
add_mem_nodes
();
current_mem_trace_event_node_proto_
=
mem_node_proto
;
(
*
memnode
)
->
LogMe
(
this
);
}
}
}
}
void
SerializationLogger
::
LogMemTraceEventNode
(
const
MemTraceEventNode
&
mem_node
)
{
MemTraceEventProto
*
mem_trace_event
=
new
MemTraceEventProto
();
mem_trace_event
->
set_timestamp_ns
(
mem_node
.
TimeStampNs
());
mem_trace_event
->
set_type
(
static_cast
<
TracerMemEventTypeProto
>
(
mem_node
.
Type
()));
mem_trace_event
->
set_addr
(
mem_node
.
Addr
());
mem_trace_event
->
set_process_id
(
mem_node
.
ProcessId
());
mem_trace_event
->
set_thread_id
(
mem_node
.
ThreadId
());
mem_trace_event
->
set_increase_bytes
(
mem_node
.
IncreaseBytes
());
mem_trace_event
->
set_place
(
mem_node
.
Place
());
mem_trace_event
->
set_current_allocated
(
mem_node
.
CurrentAllocated
());
mem_trace_event
->
set_current_reserved
(
mem_node
.
CurrentReserved
());
current_mem_trace_event_node_proto_
->
set_allocated_mem_event
(
mem_trace_event
);
}
void
SerializationLogger
::
LogHostTraceEventNode
(
const
HostTraceEventNode
&
host_node
)
{
HostTraceEventProto
*
host_trace_event
=
new
HostTraceEventProto
();
...
...
@@ -122,6 +145,59 @@ void SerializationLogger::LogHostTraceEventNode(
host_trace_event
->
set_thread_id
(
host_node
.
ThreadId
());
current_host_trace_event_node_proto_
->
set_allocated_host_trace_event
(
host_trace_event
);
OperatorSupplementEventNode
*
op_supplement_event_node
=
host_node
.
GetOperatorSupplementEventNode
();
if
(
op_supplement_event_node
!=
nullptr
)
{
current_op_supplement_event_node_proto_
=
current_host_trace_event_node_proto_
->
add_op_supplement_nodes
();
OperatorSupplementEventProto
*
op_supplement_event_proto
=
new
OperatorSupplementEventProto
();
op_supplement_event_proto
->
set_op_type
(
op_supplement_event_node
->
Name
());
op_supplement_event_proto
->
set_timestamp_ns
(
op_supplement_event_node
->
TimeStampNs
());
op_supplement_event_proto
->
set_process_id
(
op_supplement_event_node
->
ProcessId
());
op_supplement_event_proto
->
set_thread_id
(
op_supplement_event_node
->
ThreadId
());
op_supplement_event_proto
->
set_callstack
(
op_supplement_event_node
->
CallStack
());
OperatorSupplementEventProto
::
input_shape_proto
*
input_shape_proto
=
op_supplement_event_proto
->
mutable_input_shapes
();
for
(
auto
it
=
op_supplement_event_node
->
InputShapes
().
begin
();
it
!=
op_supplement_event_node
->
InputShapes
().
end
();
it
++
)
{
input_shape_proto
->
add_key
(
it
->
first
);
OperatorSupplementEventProto
::
input_shape_proto
::
shape_vector
*
shape_vectors_proto
=
input_shape_proto
->
add_shape_vecs
();
auto
shape_vectors
=
it
->
second
;
for
(
auto
shape_vecs_it
=
shape_vectors
.
begin
();
shape_vecs_it
!=
shape_vectors
.
end
();
shape_vecs_it
++
)
{
auto
shape_vector
=
*
shape_vecs_it
;
OperatorSupplementEventProto
::
input_shape_proto
::
shape_vector
::
shape
*
shape_proto
=
shape_vectors_proto
->
add_shapes
();
for
(
auto
shape_it
=
shape_vector
.
begin
();
shape_it
!=
shape_vector
.
end
();
shape_it
++
)
{
shape_proto
->
add_size
(
*
shape_it
);
}
}
}
OperatorSupplementEventProto
::
dtype_proto
*
dtype_proto
=
op_supplement_event_proto
->
mutable_dtypes
();
for
(
auto
it
=
op_supplement_event_node
->
Dtypes
().
begin
();
it
!=
op_supplement_event_node
->
Dtypes
().
end
();
it
++
)
{
dtype_proto
->
add_key
(
it
->
first
);
OperatorSupplementEventProto
::
dtype_proto
::
dtype_vector
*
dtype_vector_proto
=
dtype_proto
->
add_dtype_vecs
();
auto
dtype_vector
=
it
->
second
;
for
(
auto
dtype_it
=
dtype_vector
.
begin
();
dtype_it
!=
dtype_vector
.
end
();
dtype_it
++
)
{
dtype_vector_proto
->
add_dtype
(
*
dtype_it
);
}
}
current_op_supplement_event_node_proto_
->
set_allocated_op_supplement_event
(
op_supplement_event_proto
);
}
}
void
SerializationLogger
::
LogRuntimeTraceEventNode
(
...
...
paddle/fluid/platform/profiler/dump/serialization_logger.h
浏览文件 @
360b8383
...
...
@@ -34,6 +34,7 @@ class SerializationLogger : public BaseLogger {
void
LogRuntimeTraceEventNode
(
const
CudaRuntimeTraceEventNode
&
)
override
;
void
LogNodeTrees
(
const
NodeTrees
&
)
override
;
void
LogMetaInfo
(
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
);
void
LogMemTraceEventNode
(
const
MemTraceEventNode
&
)
override
;
private:
void
OpenFile
();
...
...
@@ -48,6 +49,8 @@ class SerializationLogger : public BaseLogger {
HostTraceEventNodeProto
*
current_host_trace_event_node_proto_
;
CudaRuntimeTraceEventNodeProto
*
current_runtime_trace_event_node_proto_
;
DeviceTraceEventNodeProto
*
current_device_trace_event_node_proto_
;
MemTraceEventNodeProto
*
current_mem_trace_event_node_proto_
;
OperatorSupplementEventNodeProto
*
current_op_supplement_event_node_proto_
;
};
}
// namespace platform
...
...
paddle/fluid/platform/profiler/dump/test_serialization_logger.cc
浏览文件 @
360b8383
...
...
@@ -34,6 +34,7 @@ using paddle::platform::ProfilerResult;
using
paddle
::
platform
::
RuntimeTraceEvent
;
using
paddle
::
platform
::
SerializationLogger
;
using
paddle
::
platform
::
TracerEventType
;
using
paddle
::
platform
::
TracerMemEventType
;
TEST
(
SerializationLoggerTest
,
dump_case0
)
{
std
::
list
<
HostTraceEvent
>
host_events
;
...
...
@@ -50,6 +51,19 @@ TEST(SerializationLoggerTest, dump_case0) {
std
::
string
(
"op2"
),
TracerEventType
::
Operator
,
21000
,
30000
,
10
,
10
));
host_events
.
push_back
(
HostTraceEvent
(
std
::
string
(
"op3"
),
TracerEventType
::
Operator
,
31000
,
40000
,
10
,
11
));
mem_events
.
push_back
(
MemTraceEvent
(
11500
,
0x1000
,
TracerMemEventType
::
Allocate
,
10
,
10
,
50
,
"GPU:0"
,
50
,
50
));
mem_events
.
push_back
(
MemTraceEvent
(
11900
,
0x1000
,
TracerMemEventType
::
Free
,
10
,
10
,
-
50
,
"GPU:0"
,
0
,
50
));
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
vector
<
int64_t
>>>
input_shapes
;
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
dtypes
;
input_shapes
[
std
::
string
(
"X"
)].
push_back
(
std
::
vector
<
int64_t
>
{
1
,
2
,
3
});
input_shapes
[
std
::
string
(
"X"
)].
push_back
(
std
::
vector
<
int64_t
>
{
4
,
5
,
6
,
7
});
dtypes
[
std
::
string
(
"X"
)].
push_back
(
std
::
string
(
"int8"
));
dtypes
[
std
::
string
(
"X"
)].
push_back
(
std
::
string
(
"float32"
));
op_supplement_events
.
push_back
(
OperatorSupplementEvent
(
11600
,
"op1"
,
input_shapes
,
dtypes
,
"op1()"
,
10
,
10
));
runtime_events
.
push_back
(
RuntimeTraceEvent
(
std
::
string
(
"cudalaunch1"
),
15000
,
17000
,
10
,
10
,
1
,
0
));
runtime_events
.
push_back
(
RuntimeTraceEvent
(
std
::
string
(
"cudalaunch2"
),
25000
,
...
...
@@ -91,6 +105,8 @@ TEST(SerializationLoggerTest, dump_case0) {
if
((
*
it
)
->
Name
()
==
"op1"
)
{
EXPECT_EQ
((
*
it
)
->
GetChildren
().
size
(),
0u
);
EXPECT_EQ
((
*
it
)
->
GetRuntimeTraceEventNodes
().
size
(),
2u
);
EXPECT_EQ
((
*
it
)
->
GetMemTraceEventNodes
().
size
(),
2u
);
EXPECT_NE
((
*
it
)
->
GetOperatorSupplementEventNode
(),
nullptr
);
}
}
for
(
auto
it
=
thread2_nodes
.
begin
();
it
!=
thread2_nodes
.
end
();
it
++
)
{
...
...
@@ -100,6 +116,7 @@ TEST(SerializationLoggerTest, dump_case0) {
}
}
tree
.
LogMe
(
&
logger
);
logger
.
LogMetaInfo
(
std
::
unordered_map
<
std
::
string
,
std
::
string
>
());
}
TEST
(
SerializationLoggerTest
,
dump_case1
)
{
...
...
@@ -154,6 +171,7 @@ TEST(SerializationLoggerTest, dump_case1) {
}
}
tree
.
LogMe
(
&
logger
);
logger
.
LogMetaInfo
(
std
::
unordered_map
<
std
::
string
,
std
::
string
>
());
}
TEST
(
DeserializationReaderTest
,
restore_case0
)
{
...
...
@@ -173,6 +191,8 @@ TEST(DeserializationReaderTest, restore_case0) {
if
((
*
it
)
->
Name
()
==
"op1"
)
{
EXPECT_EQ
((
*
it
)
->
GetChildren
().
size
(),
0u
);
EXPECT_EQ
((
*
it
)
->
GetRuntimeTraceEventNodes
().
size
(),
2u
);
EXPECT_EQ
((
*
it
)
->
GetMemTraceEventNodes
().
size
(),
2u
);
EXPECT_NE
((
*
it
)
->
GetOperatorSupplementEventNode
(),
nullptr
);
}
}
for
(
auto
it
=
thread2_nodes
.
begin
();
it
!=
thread2_nodes
.
end
();
it
++
)
{
...
...
paddle/fluid/platform/profiler/event_python.cc
浏览文件 @
360b8383
...
...
@@ -32,6 +32,9 @@ HostPythonNode::~HostPythonNode() {
for
(
auto
it
=
device_node_ptrs
.
begin
();
it
!=
device_node_ptrs
.
end
();
++
it
)
{
delete
*
it
;
}
for
(
auto
it
=
mem_node_ptrs
.
begin
();
it
!=
mem_node_ptrs
.
end
();
++
it
)
{
delete
*
it
;
}
}
HostPythonNode
*
ProfilerResult
::
CopyTree
(
HostTraceEventNode
*
root
)
{
...
...
@@ -77,6 +80,29 @@ HostPythonNode* ProfilerResult::CopyTree(HostTraceEventNode* root) {
runtime_python_node
->
device_node_ptrs
.
push_back
(
device_python_node
);
}
}
// copy MemTraceEventNode
for
(
auto
memnode
=
root
->
GetMemTraceEventNodes
().
begin
();
memnode
!=
root
->
GetMemTraceEventNodes
().
end
();
memnode
++
)
{
MemPythonNode
*
mem_python_node
=
new
MemPythonNode
();
mem_python_node
->
timestamp_ns
=
(
*
memnode
)
->
TimeStampNs
();
mem_python_node
->
addr
=
(
*
memnode
)
->
Addr
();
mem_python_node
->
type
=
(
*
memnode
)
->
Type
();
mem_python_node
->
process_id
=
(
*
memnode
)
->
ProcessId
();
mem_python_node
->
thread_id
=
(
*
memnode
)
->
ThreadId
();
mem_python_node
->
increase_bytes
=
(
*
memnode
)
->
IncreaseBytes
();
mem_python_node
->
place
=
(
*
memnode
)
->
Place
();
mem_python_node
->
current_allocated
=
(
*
memnode
)
->
CurrentAllocated
();
mem_python_node
->
current_reserved
=
(
*
memnode
)
->
CurrentReserved
();
host_python_node
->
mem_node_ptrs
.
push_back
(
mem_python_node
);
}
// copy OperatorSupplementEventNode's information if exists
OperatorSupplementEventNode
*
op_supplement_node
=
root
->
GetOperatorSupplementEventNode
();
if
(
op_supplement_node
!=
nullptr
)
{
host_python_node
->
input_shapes
=
op_supplement_node
->
InputShapes
();
host_python_node
->
dtypes
=
op_supplement_node
->
Dtypes
();
host_python_node
->
callstack
=
op_supplement_node
->
CallStack
();
}
return
host_python_node
;
}
...
...
paddle/fluid/platform/profiler/event_python.h
浏览文件 @
360b8383
...
...
@@ -43,6 +43,31 @@ struct DevicePythonNode {
uint64_t
stream_id
;
};
struct
MemPythonNode
{
MemPythonNode
()
=
default
;
~
MemPythonNode
()
{}
// timestamp of the record
uint64_t
timestamp_ns
;
// memory addr of allocation or free
uint64_t
addr
;
// memory manipulation type
TracerMemEventType
type
;
// process id of the record
uint64_t
process_id
;
// thread id of the record
uint64_t
thread_id
;
// increase bytes after this manipulation, allocation for sign +, free for
// sign -
int64_t
increase_bytes
;
// place
std
::
string
place
;
// current total allocated memory
uint64_t
current_allocated
;
// current total reserved memory
uint64_t
current_reserved
;
};
struct
HostPythonNode
{
HostPythonNode
()
=
default
;
~
HostPythonNode
();
...
...
@@ -58,12 +83,19 @@ struct HostPythonNode {
uint64_t
process_id
;
// thread id of the record
uint64_t
thread_id
;
// input shapes
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
vector
<
int64_t
>>>
input_shapes
;
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
dtypes
;
// call stack
std
::
string
callstack
;
// children node
std
::
vector
<
HostPythonNode
*>
children_node_ptrs
;
// runtime node
std
::
vector
<
HostPythonNode
*>
runtime_node_ptrs
;
// device node
std
::
vector
<
DevicePythonNode
*>
device_node_ptrs
;
// mem node
std
::
vector
<
MemPythonNode
*>
mem_node_ptrs
;
};
class
ProfilerResult
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录