Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
40d2b7c6
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
40d2b7c6
编写于
2月 16, 2022
作者:
C
chenjian
提交者:
GitHub
2月 16, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add profiler tree dump code (#39519)
* add profiler tree dump code * add CMakeLists item * reduce some contents for pr
上级
3581c075
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
983 addition
and
0 deletion
+983
-0
paddle/fluid/platform/profiler/CMakeLists.txt
paddle/fluid/platform/profiler/CMakeLists.txt
+1
-0
paddle/fluid/platform/profiler/dump/CMakeLists.txt
paddle/fluid/platform/profiler/dump/CMakeLists.txt
+4
-0
paddle/fluid/platform/profiler/dump/deserialization_reader.cc
...le/fluid/platform/profiler/dump/deserialization_reader.cc
+218
-0
paddle/fluid/platform/profiler/dump/deserialization_reader.h
paddle/fluid/platform/profiler/dump/deserialization_reader.h
+45
-0
paddle/fluid/platform/profiler/dump/nodetree.proto
paddle/fluid/platform/profiler/dump/nodetree.proto
+181
-0
paddle/fluid/platform/profiler/dump/serialization_logger.cc
paddle/fluid/platform/profiler/dump/serialization_logger.cc
+265
-0
paddle/fluid/platform/profiler/dump/serialization_logger.h
paddle/fluid/platform/profiler/dump/serialization_logger.h
+49
-0
paddle/fluid/platform/profiler/dump/test_serialization_logger.cc
...fluid/platform/profiler/dump/test_serialization_logger.cc
+174
-0
paddle/fluid/platform/profiler/utils.h
paddle/fluid/platform/profiler/utils.h
+46
-0
未找到文件。
paddle/fluid/platform/profiler/CMakeLists.txt
浏览文件 @
40d2b7c6
...
...
@@ -5,3 +5,4 @@ cc_library(event_node SRCS event_node.cc DEPS enforce)
cc_library
(
chrometracinglogger SRCS chrometracing_logger.cc DEPS event_node
)
cc_test
(
test_event_node SRCS test_event_node.cc DEPS event_node chrometracinglogger
)
cc_test
(
new_profiler_test SRCS profiler_test.cc DEPS new_profiler event_node
)
add_subdirectory
(
dump
)
paddle/fluid/platform/profiler/dump/CMakeLists.txt
0 → 100644
浏览文件 @
40d2b7c6
proto_library
(
nodetreeproto SRCS nodetree.proto
)
cc_library
(
serialization_logger SRCS serialization_logger.cc DEPS nodetreeproto event_node
)
cc_library
(
deserialization_reader SRCS deserialization_reader.cc DEPS nodetreeproto event_node
)
cc_test
(
test_serialization_logger SRCS test_serialization_logger.cc DEPS serialization_logger deserialization_reader event_node
)
paddle/fluid/platform/profiler/dump/deserialization_reader.cc
0 → 100644
浏览文件 @
40d2b7c6
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/profiler/dump/deserialization_reader.h"
#include <cstring>
namespace
paddle
{
namespace
platform
{
DeserializationReader
::
DeserializationReader
(
const
std
::
string
&
filename
)
:
filename_
(
filename
)
{
OpenFile
();
node_trees_proto_
=
new
NodeTreesProto
();
}
DeserializationReader
::
DeserializationReader
(
const
char
*
filename
)
:
filename_
(
filename
)
{
OpenFile
();
node_trees_proto_
=
new
NodeTreesProto
();
}
void
DeserializationReader
::
OpenFile
()
{
input_file_stream_
.
open
(
filename_
,
std
::
ifstream
::
in
|
std
::
ifstream
::
binary
);
if
(
!
input_file_stream_
)
{
VLOG
(
2
)
<<
"Unable to open file for writing profiling data."
<<
std
::
endl
;
}
else
{
VLOG
(
0
)
<<
"Read profiling data from "
<<
filename_
<<
std
::
endl
;
}
}
std
::
unique_ptr
<
NodeTrees
>
DeserializationReader
::
Parse
()
{
if
(
!
node_trees_proto_
->
ParseFromIstream
(
&
input_file_stream_
))
{
VLOG
(
2
)
<<
"Unable to load node trees in protobuf."
<<
std
::
endl
;
return
nullptr
;
}
std
::
map
<
uint64_t
,
HostTraceEventNode
*>
thread_event_trees_map
;
for
(
int
node_tree_index
=
0
;
node_tree_index
<
node_trees_proto_
->
thread_trees_size
();
node_tree_index
++
)
{
// handle one thread tree
std
::
map
<
int64_t
,
HostTraceEventNode
*>
index_node_map
;
std
::
map
<
int64_t
,
int64_t
>
child_parent_map
;
const
ThreadNodeTreeProto
&
thread_node_tree_proto
=
node_trees_proto_
->
thread_trees
(
node_tree_index
);
uint64_t
current_threadid
=
thread_node_tree_proto
.
thread_id
();
for
(
int
host_node_index
=
0
;
host_node_index
<
thread_node_tree_proto
.
host_nodes_size
();
host_node_index
++
)
{
// handle host node
const
HostTraceEventNodeProto
&
host_node_proto
=
thread_node_tree_proto
.
host_nodes
(
host_node_index
);
HostTraceEventNode
*
host_node
=
RestoreHostTraceEventNode
(
host_node_proto
);
index_node_map
[
host_node_proto
.
id
()]
=
host_node
;
child_parent_map
[
host_node_proto
.
id
()]
=
host_node_proto
.
parentid
();
// handle runtime node
for
(
int
runtime_node_index
=
0
;
runtime_node_index
<
host_node_proto
.
runtime_nodes_size
();
runtime_node_index
++
)
{
const
CudaRuntimeTraceEventNodeProto
&
runtime_node_proto
=
host_node_proto
.
runtime_nodes
(
runtime_node_index
);
CudaRuntimeTraceEventNode
*
runtime_node
=
RestoreCudaRuntimeTraceEventNode
(
runtime_node_proto
);
host_node
->
AddCudaRuntimeNode
(
runtime_node
);
// insert into host_node
// handle device node
for
(
int
device_node_index
=
0
;
device_node_index
<
runtime_node_proto
.
device_nodes_size
();
device_node_index
++
)
{
const
DeviceTraceEventNodeProto
&
device_node_proto
=
runtime_node_proto
.
device_nodes
(
device_node_index
);
DeviceTraceEventNode
*
device_node
=
RestoreDeviceTraceEventNode
(
device_node_proto
);
runtime_node
->
AddDeviceTraceEventNode
(
device_node
);
// insert into runtime_node
}
}
}
// restore parent-child relationship
for
(
auto
it
=
child_parent_map
.
begin
();
it
!=
child_parent_map
.
end
();
it
++
)
{
if
(
it
->
second
!=
-
1
)
{
// not root node
index_node_map
[
it
->
second
]
->
AddChild
(
index_node_map
[
it
->
first
]);
}
else
{
thread_event_trees_map
[
current_threadid
]
=
index_node_map
[
it
->
first
];
// root node
}
}
}
// restore NodeTrees object
return
std
::
unique_ptr
<
NodeTrees
>
(
new
NodeTrees
(
thread_event_trees_map
));
}
DeserializationReader
::~
DeserializationReader
()
{
delete
node_trees_proto_
;
input_file_stream_
.
close
();
}
DeviceTraceEventNode
*
DeserializationReader
::
RestoreDeviceTraceEventNode
(
const
DeviceTraceEventNodeProto
&
device_node_proto
)
{
const
DeviceTraceEventProto
&
device_event_proto
=
device_node_proto
.
device_event
();
DeviceTraceEvent
device_event
;
device_event
.
name
=
device_event_proto
.
name
();
device_event
.
type
=
static_cast
<
TracerEventType
>
(
device_event_proto
.
type
());
device_event
.
start_ns
=
device_event_proto
.
start_ns
();
device_event
.
end_ns
=
device_event_proto
.
end_ns
();
device_event
.
device_id
=
device_event_proto
.
device_id
();
device_event
.
context_id
=
device_event_proto
.
context_id
();
device_event
.
stream_id
=
device_event_proto
.
stream_id
();
device_event
.
correlation_id
=
device_event_proto
.
correlation_id
();
switch
(
device_event
.
type
)
{
case
TracerEventType
::
Kernel
:
device_event
.
kernel_info
=
HandleKernelEventInfoProto
(
device_event_proto
);
break
;
case
TracerEventType
::
Memcpy
:
device_event
.
memcpy_info
=
HandleMemcpyEventInfoProto
(
device_event_proto
);
break
;
case
TracerEventType
::
Memset
:
device_event
.
memset_info
=
HandleMemsetEventInfoProto
(
device_event_proto
);
break
;
default:
break
;
}
return
new
DeviceTraceEventNode
(
device_event
);
}
CudaRuntimeTraceEventNode
*
DeserializationReader
::
RestoreCudaRuntimeTraceEventNode
(
const
CudaRuntimeTraceEventNodeProto
&
runtime_node_proto
)
{
const
CudaRuntimeTraceEventProto
&
runtime_event_proto
=
runtime_node_proto
.
runtime_trace_event
();
RuntimeTraceEvent
runtime_event
;
runtime_event
.
name
=
runtime_event_proto
.
name
();
runtime_event
.
start_ns
=
runtime_event_proto
.
start_ns
();
runtime_event
.
end_ns
=
runtime_event_proto
.
end_ns
();
runtime_event
.
process_id
=
runtime_event_proto
.
process_id
();
runtime_event
.
thread_id
=
runtime_event_proto
.
thread_id
();
runtime_event
.
correlation_id
=
runtime_event_proto
.
correlation_id
();
runtime_event
.
callback_id
=
runtime_event_proto
.
callback_id
();
return
new
CudaRuntimeTraceEventNode
(
runtime_event
);
}
HostTraceEventNode
*
DeserializationReader
::
RestoreHostTraceEventNode
(
const
HostTraceEventNodeProto
&
host_node_proto
)
{
const
HostTraceEventProto
&
host_event_proto
=
host_node_proto
.
host_trace_event
();
HostTraceEvent
host_event
;
host_event
.
name
=
host_event_proto
.
name
();
host_event
.
type
=
static_cast
<
TracerEventType
>
(
host_event_proto
.
type
());
host_event
.
start_ns
=
host_event_proto
.
start_ns
();
host_event
.
end_ns
=
host_event_proto
.
end_ns
();
host_event
.
process_id
=
host_event_proto
.
process_id
();
host_event
.
thread_id
=
host_event_proto
.
thread_id
();
return
new
HostTraceEventNode
(
host_event
);
}
KernelEventInfo
DeserializationReader
::
HandleKernelEventInfoProto
(
const
DeviceTraceEventProto
&
device_event_proto
)
{
const
KernelEventInfoProto
&
kernel_info_proto
=
device_event_proto
.
kernel_info
();
KernelEventInfo
kernel_info
;
kernel_info
.
block_x
=
kernel_info_proto
.
block_x
();
kernel_info
.
block_y
=
kernel_info_proto
.
block_y
();
kernel_info
.
block_z
=
kernel_info_proto
.
block_z
();
kernel_info
.
grid_x
=
kernel_info_proto
.
grid_x
();
kernel_info
.
grid_y
=
kernel_info_proto
.
grid_y
();
kernel_info
.
grid_z
=
kernel_info_proto
.
grid_z
();
kernel_info
.
dynamic_shared_memory
=
kernel_info_proto
.
dynamic_shared_memory
();
kernel_info
.
static_shared_memory
=
kernel_info_proto
.
static_shared_memory
();
kernel_info
.
registers_per_thread
=
kernel_info_proto
.
registers_per_thread
();
kernel_info
.
local_memory_per_thread
=
kernel_info_proto
.
local_memory_per_thread
();
kernel_info
.
local_memory_total
=
kernel_info_proto
.
local_memory_total
();
kernel_info
.
queued
=
kernel_info_proto
.
queued
();
kernel_info
.
submitted
=
kernel_info_proto
.
submitted
();
kernel_info
.
completed
=
kernel_info_proto
.
completed
();
return
kernel_info
;
}
MemcpyEventInfo
DeserializationReader
::
HandleMemcpyEventInfoProto
(
const
DeviceTraceEventProto
&
device_event_proto
)
{
const
MemcpyEventInfoProto
&
memcpy_info_proto
=
device_event_proto
.
memcpy_info
();
MemcpyEventInfo
memcpy_info
;
memcpy_info
.
num_bytes
=
memcpy_info_proto
.
num_bytes
();
std
::
strncpy
(
memcpy_info
.
copy_kind
,
memcpy_info_proto
.
copy_kind
().
c_str
(),
kMemKindMaxLen
-
1
);
std
::
strncpy
(
memcpy_info
.
src_kind
,
memcpy_info_proto
.
src_kind
().
c_str
(),
kMemKindMaxLen
-
1
);
std
::
strncpy
(
memcpy_info
.
dst_kind
,
memcpy_info_proto
.
dst_kind
().
c_str
(),
kMemKindMaxLen
-
1
);
return
memcpy_info
;
}
MemsetEventInfo
DeserializationReader
::
HandleMemsetEventInfoProto
(
const
DeviceTraceEventProto
&
device_event_proto
)
{
const
MemsetEventInfoProto
&
memset_info_proto
=
device_event_proto
.
memset_info
();
MemsetEventInfo
memset_info
;
memset_info
.
num_bytes
=
memset_info_proto
.
num_bytes
();
std
::
strncpy
(
memset_info
.
memory_kind
,
memset_info_proto
.
memory_kind
().
c_str
(),
kMemKindMaxLen
-
1
);
memset_info
.
value
=
memset_info_proto
.
value
();
return
memset_info
;
}
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/profiler/dump/deserialization_reader.h
0 → 100644
浏览文件 @
40d2b7c6
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include "paddle/fluid/platform/profiler/dump/nodetree.pb.h"
#include "paddle/fluid/platform/profiler/event_node.h"
namespace
paddle
{
namespace
platform
{
class
DeserializationReader
{
public:
explicit
DeserializationReader
(
const
std
::
string
&
filename
);
explicit
DeserializationReader
(
const
char
*
filename
);
~
DeserializationReader
();
std
::
unique_ptr
<
NodeTrees
>
Parse
();
private:
void
OpenFile
();
DeviceTraceEventNode
*
RestoreDeviceTraceEventNode
(
const
DeviceTraceEventNodeProto
&
);
CudaRuntimeTraceEventNode
*
RestoreCudaRuntimeTraceEventNode
(
const
CudaRuntimeTraceEventNodeProto
&
);
HostTraceEventNode
*
RestoreHostTraceEventNode
(
const
HostTraceEventNodeProto
&
);
KernelEventInfo
HandleKernelEventInfoProto
(
const
DeviceTraceEventProto
&
);
MemcpyEventInfo
HandleMemcpyEventInfoProto
(
const
DeviceTraceEventProto
&
);
MemsetEventInfo
HandleMemsetEventInfoProto
(
const
DeviceTraceEventProto
&
);
std
::
string
filename_
;
std
::
ifstream
input_file_stream_
;
NodeTreesProto
*
node_trees_proto_
;
};
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/profiler/dump/nodetree.proto
0 → 100644
浏览文件 @
40d2b7c6
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax
=
"proto2"
;
package
paddle
.
platform
;
enum
TracerEventTypeProto
{
// Used to mark operator record
Operator
=
0
;
// Used to mark dataloader record
Dataloader
=
1
;
// Used to mark profile step record
ProfileStep
=
2
;
// Used to mark cuda runtime record returned by cupti
CudaRuntime
=
3
;
// Used to mark kernel computation record returned by cupti
Kernel
=
4
;
// Used to mark memcpy record returned by cupti
Memcpy
=
5
;
// Used to mark memset record returned by cupti
Memset
=
6
;
// Used to mark record defined by user
UserDefined
=
7
;
// A flag to denote the number of current types
NumTypes
=
8
;
}
message
KernelEventInfoProto
{
// The X-dimension block size for the kernel.
required
uint32
block_x
=
1
;
// The Y-dimension block size for the kernel.
required
uint32
block_y
=
2
;
// The Z-dimension grid size for the kernel.
required
uint32
block_z
=
3
;
// X-dimension of a grid.
required
uint32
grid_x
=
4
;
// Y-dimension of a grid.
required
uint32
grid_y
=
5
;
// Z-dimension of a grid.
required
uint32
grid_z
=
6
;
// The dynamic shared memory reserved for the kernel, in bytes.
required
uint32
dynamic_shared_memory
=
7
;
// The static shared memory allocated for the kernel, in bytes.
required
uint32
static_shared_memory
=
8
;
// The number of registers required for each thread executing the kernel.
required
uint32
registers_per_thread
=
9
;
// The amount of local memory reserved for each thread, in bytes.
required
uint32
local_memory_per_thread
=
10
;
// The total amount of local memory reserved for the kernel, in bytes.
required
uint32
local_memory_total
=
11
;
// The timestamp when the kernel is queued up in the command buffer, in ns.
// This timestamp is not collected by default. Use API
// cuptiActivityEnableLatencyTimestamps() to enable collection.
required
uint64
queued
=
12
;
// The timestamp when the command buffer containing the kernel launch is
// submitted to the GPU, in ns.
// This timestamp is not collected by default. Use API
// cuptiActivityEnableLatencyTimestamps() to enable collection.
required
uint64
submitted
=
13
;
// The completed timestamp for the kernel execution, in ns.
required
uint64
completed
=
14
;
}
message
MemcpyEventInfoProto
{
// The number of bytes transferred by the memory copy.
required
uint64
num_bytes
=
1
;
// The kind of the memory copy.
// Each kind represents the source and destination targets of a memory copy.
// Targets are host, device, and array. Refer to CUpti_ActivityMemcpyKind
required
string
copy_kind
=
2
;
// The source memory kind read by the memory copy.
// Each kind represents the type of the memory accessed by a memory
// operation/copy. Refer to CUpti_ActivityMemoryKind
required
string
src_kind
=
3
;
// The destination memory kind read by the memory copy.
required
string
dst_kind
=
4
;
}
message
MemsetEventInfoProto
{
// The number of bytes being set by the memory set.
required
uint64
num_bytes
=
1
;
// The memory kind of the memory set. Refer to CUpti_ActivityMemoryKind
required
string
memory_kind
=
2
;
// the value being assigned to memory by the memory set.
required
uint32
value
=
3
;
}
message
HostTraceEventProto
{
required
string
name
=
1
;
required
TracerEventTypeProto
type
=
2
;
// start timestamp of the record
required
uint64
start_ns
=
3
;
// end timestamp of the record
required
uint64
end_ns
=
4
;
// process id of the record
required
uint64
process_id
=
5
;
// thread id of the record
required
uint64
thread_id
=
6
;
}
message
CudaRuntimeTraceEventProto
{
// record name
required
string
name
=
1
;
// start timestamp of the record
required
uint64
start_ns
=
2
;
// end timestamp of the record
required
uint64
end_ns
=
3
;
// process id of the record
required
uint64
process_id
=
4
;
// thread id of the record
required
uint64
thread_id
=
5
;
// correlation id, used for correlating async activities happened on device
required
uint32
correlation_id
=
6
;
// callback id, used to identify which cuda runtime api is called
required
uint32
callback_id
=
7
;
}
message
DeviceTraceEventProto
{
// record name
required
string
name
=
1
;
// record type, one of TracerEventType
required
TracerEventTypeProto
type
=
2
;
// start timestamp of the record
required
uint64
start_ns
=
3
;
// end timestamp of the record
required
uint64
end_ns
=
4
;
// device id
required
uint64
device_id
=
5
;
// context id
required
uint64
context_id
=
6
;
// stream id
required
uint64
stream_id
=
7
;
// correlation id, used for correlating async activities happened on device
required
uint32
correlation_id
=
8
;
// union, specific device record type has different detail information
oneof
detail_info
{
// used for TracerEventType::Kernel
KernelEventInfoProto
kernel_info
=
9
;
// used for TracerEventType::Memcpy
MemcpyEventInfoProto
memcpy_info
=
10
;
// used for TracerEventType::Memset
MemsetEventInfoProto
memset_info
=
11
;
}
}
message
DeviceTraceEventNodeProto
{
required
DeviceTraceEventProto
device_event
=
1
;
}
message
CudaRuntimeTraceEventNodeProto
{
required
CudaRuntimeTraceEventProto
runtime_trace_event
=
1
;
repeated
DeviceTraceEventNodeProto
device_nodes
=
2
;
}
message
HostTraceEventNodeProto
{
required
int64
id
=
1
;
required
int64
parentid
=
2
;
required
HostTraceEventProto
host_trace_event
=
3
;
repeated
CudaRuntimeTraceEventNodeProto
runtime_nodes
=
4
;
}
message
ThreadNodeTreeProto
{
required
uint64
thread_id
=
1
;
repeated
HostTraceEventNodeProto
host_nodes
=
2
;
}
message
NodeTreesProto
{
required
string
version
=
1
;
repeated
ThreadNodeTreeProto
thread_trees
=
2
;
}
paddle/fluid/platform/profiler/dump/serialization_logger.cc
0 → 100644
浏览文件 @
40d2b7c6
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "glog/logging.h"
#include "paddle/fluid/platform/profiler/dump/serialization_logger.h"
#include "paddle/fluid/platform/profiler/event_node.h"
#include "paddle/fluid/platform/profiler/utils.h"
namespace
paddle
{
namespace
platform
{
static
const
char
*
kDefaultFilename
=
"pid_%s_time_%s.paddle_trace.pb"
;
static
const
char
*
version
=
"1.0.0"
;
static
std
::
string
DefaultFileName
()
{
auto
pid
=
GetProcessId
();
return
string_format
(
std
::
string
(
kDefaultFilename
),
pid
,
GetStringFormatLocalTime
().
c_str
());
}
void
SerializationLogger
::
OpenFile
()
{
output_file_stream_
.
open
(
filename_
,
std
::
ofstream
::
out
|
std
::
ofstream
::
trunc
|
std
::
ofstream
::
binary
);
if
(
!
output_file_stream_
)
{
LOG
(
WARNING
)
<<
"Unable to open file for writing profiling data."
<<
std
::
endl
;
}
else
{
LOG
(
INFO
)
<<
"writing profiling data to "
<<
filename_
<<
std
::
endl
;
}
node_trees_proto_
=
new
NodeTreesProto
();
node_trees_proto_
->
set_version
(
std
::
string
(
version
));
}
void
SerializationLogger
::
LogNodeTrees
(
const
NodeTrees
&
node_trees
)
{
// dump the whole tree into file
const
std
::
map
<
uint64_t
,
std
::
vector
<
HostTraceEventNode
*>>
thread2host_event_nodes
=
node_trees
.
Traverse
(
true
);
for
(
auto
it
=
thread2host_event_nodes
.
begin
();
it
!=
thread2host_event_nodes
.
end
();
++
it
)
{
// 1. order every node an index, every node a parent
std
::
map
<
HostTraceEventNode
*
,
int64_t
>
node_index_map
;
std
::
map
<
HostTraceEventNode
*
,
int64_t
>
node_parent_map
;
int64_t
index
=
0
;
for
(
auto
hostnode
=
it
->
second
.
begin
();
hostnode
!=
it
->
second
.
end
();
++
hostnode
)
{
node_index_map
[(
*
hostnode
)]
=
index
;
// order each node
index
++
;
}
node_parent_map
[(
*
(
it
->
second
.
begin
()))]
=
-
1
;
// root's parent set as -1
for
(
auto
hostnode
=
it
->
second
.
begin
();
hostnode
!=
it
->
second
.
end
();
++
hostnode
)
{
for
(
auto
childnode
=
(
*
hostnode
)
->
GetChildren
().
begin
();
childnode
!=
(
*
hostnode
)
->
GetChildren
().
end
();
++
childnode
)
{
node_parent_map
[(
*
childnode
)]
=
node_index_map
[(
*
hostnode
)];
// mark each node's parent
}
}
// 2. serialize host node, runtime node and device node
current_thread_node_tree_proto_
=
node_trees_proto_
->
add_thread_trees
();
// add ThreadNodeTreeProto
current_thread_node_tree_proto_
->
set_thread_id
(
it
->
first
);
for
(
auto
hostnode
=
it
->
second
.
begin
();
hostnode
!=
it
->
second
.
end
();
++
hostnode
)
{
HostTraceEventNodeProto
*
host_node_proto
=
current_thread_node_tree_proto_
->
add_host_nodes
();
// add HostTraceEventNodeProto
host_node_proto
->
set_id
(
node_index_map
[(
*
hostnode
)]);
host_node_proto
->
set_parentid
(
node_parent_map
[(
*
hostnode
)]);
current_host_trace_event_node_proto_
=
host_node_proto
;
// set current HostTraceEventNodeProto
(
*
hostnode
)
->
LogMe
(
this
);
// fill detail information
for
(
auto
runtimenode
=
(
*
hostnode
)
->
GetRuntimeTraceEventNodes
().
begin
();
runtimenode
!=
(
*
hostnode
)
->
GetRuntimeTraceEventNodes
().
end
();
++
runtimenode
)
{
CudaRuntimeTraceEventNodeProto
*
runtime_node_proto
=
current_host_trace_event_node_proto_
->
add_runtime_nodes
();
// add CudaRuntimeTraceEventNodeProto
current_runtime_trace_event_node_proto_
=
runtime_node_proto
;
// set current CudaRuntimeTraceEventNodeProto
(
*
runtimenode
)
->
LogMe
(
this
);
// fill detail information
for
(
auto
devicenode
=
(
*
runtimenode
)
->
GetDeviceTraceEventNodes
().
begin
();
devicenode
!=
(
*
runtimenode
)
->
GetDeviceTraceEventNodes
().
end
();
++
devicenode
)
{
DeviceTraceEventNodeProto
*
device_node_proto
=
current_runtime_trace_event_node_proto_
->
add_device_nodes
();
// add DeviceTraceEventNodeProto
current_device_trace_event_node_proto_
=
device_node_proto
;
// set current DeviceTraceEventNodeProto
(
*
devicenode
)
->
LogMe
(
this
);
// fill detail information
}
}
}
}
}
void
SerializationLogger
::
LogHostTraceEventNode
(
const
HostTraceEventNode
&
host_node
)
{
HostTraceEventProto
*
host_trace_event
=
new
HostTraceEventProto
();
host_trace_event
->
set_name
(
host_node
.
Name
());
host_trace_event
->
set_type
(
static_cast
<
TracerEventTypeProto
>
(
host_node
.
Type
()));
host_trace_event
->
set_start_ns
(
host_node
.
StartNs
());
host_trace_event
->
set_end_ns
(
host_node
.
EndNs
());
host_trace_event
->
set_process_id
(
host_node
.
ProcessId
());
host_trace_event
->
set_thread_id
(
host_node
.
ThreadId
());
current_host_trace_event_node_proto_
->
set_allocated_host_trace_event
(
host_trace_event
);
}
void
SerializationLogger
::
LogRuntimeTraceEventNode
(
const
CudaRuntimeTraceEventNode
&
runtime_node
)
{
CudaRuntimeTraceEventProto
*
runtime_trace_event
=
new
CudaRuntimeTraceEventProto
();
runtime_trace_event
->
set_name
(
runtime_node
.
Name
());
runtime_trace_event
->
set_start_ns
(
runtime_node
.
StartNs
());
runtime_trace_event
->
set_end_ns
(
runtime_node
.
EndNs
());
runtime_trace_event
->
set_process_id
(
runtime_node
.
ProcessId
());
runtime_trace_event
->
set_thread_id
(
runtime_node
.
ThreadId
());
runtime_trace_event
->
set_correlation_id
(
runtime_node
.
CorrelationId
());
runtime_trace_event
->
set_callback_id
(
runtime_node
.
CallbackId
());
current_runtime_trace_event_node_proto_
->
set_allocated_runtime_trace_event
(
runtime_trace_event
);
}
void
SerializationLogger
::
LogDeviceTraceEventNode
(
const
DeviceTraceEventNode
&
device_node
)
{
switch
(
device_node
.
Type
())
{
case
TracerEventType
::
Kernel
:
HandleTypeKernel
(
device_node
);
break
;
case
TracerEventType
::
Memcpy
:
HandleTypeMemcpy
(
device_node
);
break
;
case
TracerEventType
::
Memset
:
HandleTypeMemset
(
device_node
);
break
;
default:
break
;
}
}
void
SerializationLogger
::
HandleTypeKernel
(
const
DeviceTraceEventNode
&
device_node
)
{
DeviceTraceEventProto
*
device_trace_event
=
new
DeviceTraceEventProto
();
KernelEventInfoProto
*
kernel_info
=
new
KernelEventInfoProto
();
// fill DeviceTraceEventProto
device_trace_event
->
set_name
(
device_node
.
Name
());
device_trace_event
->
set_type
(
static_cast
<
TracerEventTypeProto
>
(
device_node
.
Type
()));
device_trace_event
->
set_start_ns
(
device_node
.
StartNs
());
device_trace_event
->
set_end_ns
(
device_node
.
EndNs
());
device_trace_event
->
set_device_id
(
device_node
.
DeviceId
());
device_trace_event
->
set_context_id
(
device_node
.
ContextId
());
device_trace_event
->
set_stream_id
(
device_node
.
StreamId
());
device_trace_event
->
set_correlation_id
(
device_node
.
CorrelationId
());
// fill KernelEventInfoProto
KernelEventInfo
info
=
device_node
.
KernelInfo
();
kernel_info
->
set_block_x
(
info
.
block_x
);
kernel_info
->
set_block_y
(
info
.
block_y
);
kernel_info
->
set_block_z
(
info
.
block_z
);
kernel_info
->
set_grid_x
(
info
.
grid_x
);
kernel_info
->
set_grid_y
(
info
.
grid_y
);
kernel_info
->
set_grid_z
(
info
.
grid_z
);
kernel_info
->
set_dynamic_shared_memory
(
info
.
dynamic_shared_memory
);
kernel_info
->
set_static_shared_memory
(
info
.
static_shared_memory
);
kernel_info
->
set_registers_per_thread
(
info
.
registers_per_thread
);
kernel_info
->
set_local_memory_per_thread
(
info
.
local_memory_per_thread
);
kernel_info
->
set_local_memory_total
(
info
.
local_memory_total
);
kernel_info
->
set_queued
(
info
.
queued
);
kernel_info
->
set_submitted
(
info
.
submitted
);
kernel_info
->
set_completed
(
info
.
completed
);
// binding
device_trace_event
->
set_allocated_kernel_info
(
kernel_info
);
current_device_trace_event_node_proto_
->
set_allocated_device_event
(
device_trace_event
);
}
void
SerializationLogger
::
HandleTypeMemcpy
(
const
DeviceTraceEventNode
&
device_node
)
{
DeviceTraceEventProto
*
device_trace_event
=
new
DeviceTraceEventProto
();
MemcpyEventInfoProto
*
memcpy_info
=
new
MemcpyEventInfoProto
();
// fill DeviceTraceEventProto
device_trace_event
->
set_name
(
device_node
.
Name
());
device_trace_event
->
set_type
(
static_cast
<
TracerEventTypeProto
>
(
device_node
.
Type
()));
device_trace_event
->
set_start_ns
(
device_node
.
StartNs
());
device_trace_event
->
set_end_ns
(
device_node
.
EndNs
());
device_trace_event
->
set_device_id
(
device_node
.
DeviceId
());
device_trace_event
->
set_context_id
(
device_node
.
ContextId
());
device_trace_event
->
set_stream_id
(
device_node
.
StreamId
());
device_trace_event
->
set_correlation_id
(
device_node
.
CorrelationId
());
// fill MemcpyEventInfoProto
MemcpyEventInfo
info
=
device_node
.
MemcpyInfo
();
memcpy_info
->
set_num_bytes
(
info
.
num_bytes
);
memcpy_info
->
set_copy_kind
(
std
::
string
(
info
.
copy_kind
));
memcpy_info
->
set_src_kind
(
std
::
string
(
info
.
src_kind
));
memcpy_info
->
set_dst_kind
(
std
::
string
(
info
.
dst_kind
));
// binding
device_trace_event
->
set_allocated_memcpy_info
(
memcpy_info
);
current_device_trace_event_node_proto_
->
set_allocated_device_event
(
device_trace_event
);
}
void
SerializationLogger
::
HandleTypeMemset
(
const
DeviceTraceEventNode
&
device_node
)
{
DeviceTraceEventProto
*
device_trace_event
=
new
DeviceTraceEventProto
();
MemsetEventInfoProto
*
memset_info
=
new
MemsetEventInfoProto
();
// fill DeviceTraceEventProto
device_trace_event
->
set_name
(
device_node
.
Name
());
device_trace_event
->
set_type
(
static_cast
<
TracerEventTypeProto
>
(
device_node
.
Type
()));
device_trace_event
->
set_start_ns
(
device_node
.
StartNs
());
device_trace_event
->
set_end_ns
(
device_node
.
EndNs
());
device_trace_event
->
set_device_id
(
device_node
.
DeviceId
());
device_trace_event
->
set_context_id
(
device_node
.
ContextId
());
device_trace_event
->
set_stream_id
(
device_node
.
StreamId
());
device_trace_event
->
set_correlation_id
(
device_node
.
CorrelationId
());
// fill MemsetEventInfoProto
MemsetEventInfo
info
=
device_node
.
MemsetInfo
();
memset_info
->
set_num_bytes
(
info
.
num_bytes
);
memset_info
->
set_memory_kind
(
std
::
string
(
info
.
memory_kind
));
memset_info
->
set_value
(
info
.
value
);
// binding
device_trace_event
->
set_allocated_memset_info
(
memset_info
);
current_device_trace_event_node_proto_
->
set_allocated_device_event
(
device_trace_event
);
}
SerializationLogger
::
SerializationLogger
(
const
std
::
string
&
filename
)
{
filename_
=
filename
.
empty
()
?
DefaultFileName
()
:
filename
;
OpenFile
();
}
SerializationLogger
::
SerializationLogger
(
const
char
*
filename_cstr
)
{
std
::
string
filename
(
filename_cstr
);
filename_
=
filename
.
empty
()
?
DefaultFileName
()
:
filename
;
OpenFile
();
}
SerializationLogger
::~
SerializationLogger
()
{
if
(
!
output_file_stream_
)
{
delete
node_trees_proto_
;
return
;
}
node_trees_proto_
->
SerializeToOstream
(
&
output_file_stream_
);
delete
node_trees_proto_
;
output_file_stream_
.
close
();
}
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/profiler/dump/serialization_logger.h
0 → 100755
浏览文件 @
40d2b7c6
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/platform/profiler/dump/nodetree.pb.h"
#include "paddle/fluid/platform/profiler/output_logger.h"
namespace
paddle
{
namespace
platform
{
// Dump a NodeTrees into a profobuf file.
// A SerializationLogger object can only dump a NodeTrees object,
// creates a file in the constructor and closes the file in the destructor.
class
SerializationLogger
:
public
BaseLogger
{
public:
explicit
SerializationLogger
(
const
std
::
string
&
filename
);
explicit
SerializationLogger
(
const
char
*
filename
);
~
SerializationLogger
();
std
::
string
filename
()
{
return
filename_
;
}
void
LogDeviceTraceEventNode
(
const
DeviceTraceEventNode
&
)
override
;
void
LogHostTraceEventNode
(
const
HostTraceEventNode
&
)
override
;
void
LogRuntimeTraceEventNode
(
const
CudaRuntimeTraceEventNode
&
)
override
;
void
LogNodeTrees
(
const
NodeTrees
&
)
override
;
private:
void
OpenFile
();
void
HandleTypeKernel
(
const
DeviceTraceEventNode
&
);
void
HandleTypeMemset
(
const
DeviceTraceEventNode
&
);
void
HandleTypeMemcpy
(
const
DeviceTraceEventNode
&
);
std
::
string
filename_
;
std
::
ofstream
output_file_stream_
;
NodeTreesProto
*
node_trees_proto_
;
ThreadNodeTreeProto
*
current_thread_node_tree_proto_
;
HostTraceEventNodeProto
*
current_host_trace_event_node_proto_
;
CudaRuntimeTraceEventNodeProto
*
current_runtime_trace_event_node_proto_
;
DeviceTraceEventNodeProto
*
current_device_trace_event_node_proto_
;
};
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/profiler/dump/test_serialization_logger.cc
0 → 100644
浏览文件 @
40d2b7c6
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "paddle/fluid/platform/profiler/dump/deserialization_reader.h"
#include "paddle/fluid/platform/profiler/dump/serialization_logger.h"
#include "paddle/fluid/platform/profiler/event_node.h"
using
paddle
::
platform
::
SerializationLogger
;
using
paddle
::
platform
::
DeserializationReader
;
using
paddle
::
platform
::
NodeTrees
;
using
paddle
::
platform
::
HostTraceEventNode
;
using
paddle
::
platform
::
CudaRuntimeTraceEventNode
;
using
paddle
::
platform
::
DeviceTraceEventNode
;
using
paddle
::
platform
::
HostTraceEvent
;
using
paddle
::
platform
::
RuntimeTraceEvent
;
using
paddle
::
platform
::
DeviceTraceEvent
;
using
paddle
::
platform
::
TracerEventType
;
using
paddle
::
platform
::
KernelEventInfo
;
using
paddle
::
platform
::
MemcpyEventInfo
;
using
paddle
::
platform
::
MemsetEventInfo
;
TEST
(
SerializationLoggerTest
,
dump_case0
)
{
std
::
list
<
HostTraceEvent
>
host_events
;
std
::
list
<
RuntimeTraceEvent
>
runtime_events
;
std
::
list
<
DeviceTraceEvent
>
device_events
;
host_events
.
push_back
(
HostTraceEvent
(
std
::
string
(
"dataloader#1"
),
TracerEventType
::
Dataloader
,
1000
,
10000
,
10
,
10
));
host_events
.
push_back
(
HostTraceEvent
(
std
::
string
(
"op1"
),
TracerEventType
::
Operator
,
11000
,
20000
,
10
,
10
));
host_events
.
push_back
(
HostTraceEvent
(
std
::
string
(
"op2"
),
TracerEventType
::
Operator
,
21000
,
30000
,
10
,
10
));
host_events
.
push_back
(
HostTraceEvent
(
std
::
string
(
"op3"
),
TracerEventType
::
Operator
,
31000
,
40000
,
10
,
11
));
runtime_events
.
push_back
(
RuntimeTraceEvent
(
std
::
string
(
"cudalaunch1"
),
15000
,
17000
,
10
,
10
,
1
,
0
));
runtime_events
.
push_back
(
RuntimeTraceEvent
(
std
::
string
(
"cudalaunch2"
),
25000
,
35000
,
10
,
10
,
2
,
0
));
runtime_events
.
push_back
(
RuntimeTraceEvent
(
std
::
string
(
"cudalaunch3"
),
33000
,
37000
,
10
,
11
,
3
,
0
));
runtime_events
.
push_back
(
RuntimeTraceEvent
(
std
::
string
(
"cudaMemcpy1"
),
18000
,
19000
,
10
,
10
,
4
,
0
));
runtime_events
.
push_back
(
RuntimeTraceEvent
(
std
::
string
(
"cudaMemset1"
),
38000
,
39000
,
10
,
11
,
5
,
0
));
device_events
.
push_back
(
DeviceTraceEvent
(
std
::
string
(
"kernel1"
),
TracerEventType
::
Kernel
,
40000
,
55000
,
0
,
10
,
10
,
1
,
KernelEventInfo
()));
device_events
.
push_back
(
DeviceTraceEvent
(
std
::
string
(
"kernel2"
),
TracerEventType
::
Kernel
,
70000
,
95000
,
0
,
10
,
10
,
2
,
KernelEventInfo
()));
device_events
.
push_back
(
DeviceTraceEvent
(
std
::
string
(
"kernel3"
),
TracerEventType
::
Kernel
,
60000
,
65000
,
0
,
10
,
11
,
3
,
KernelEventInfo
()));
device_events
.
push_back
(
DeviceTraceEvent
(
std
::
string
(
"memcpy1"
),
TracerEventType
::
Memcpy
,
56000
,
59000
,
0
,
10
,
10
,
4
,
MemcpyEventInfo
()));
device_events
.
push_back
(
DeviceTraceEvent
(
std
::
string
(
"memset1"
),
TracerEventType
::
Memset
,
66000
,
69000
,
0
,
10
,
11
,
5
,
MemsetEventInfo
()));
SerializationLogger
logger
(
"test_serialization_logger_case0.pb"
);
NodeTrees
tree
(
host_events
,
runtime_events
,
device_events
);
std
::
map
<
uint64_t
,
std
::
vector
<
HostTraceEventNode
*>>
nodes
=
tree
.
Traverse
(
true
);
EXPECT_EQ
(
nodes
[
10
].
size
(),
4u
);
EXPECT_EQ
(
nodes
[
11
].
size
(),
2u
);
std
::
vector
<
HostTraceEventNode
*>
thread1_nodes
=
nodes
[
10
];
std
::
vector
<
HostTraceEventNode
*>
thread2_nodes
=
nodes
[
11
];
for
(
auto
it
=
thread1_nodes
.
begin
();
it
!=
thread1_nodes
.
end
();
it
++
)
{
if
((
*
it
)
->
Name
()
==
"root node"
)
{
EXPECT_EQ
((
*
it
)
->
GetChildren
().
size
(),
3u
);
}
if
((
*
it
)
->
Name
()
==
"op1"
)
{
EXPECT_EQ
((
*
it
)
->
GetChildren
().
size
(),
0u
);
EXPECT_EQ
((
*
it
)
->
GetRuntimeTraceEventNodes
().
size
(),
2u
);
}
}
for
(
auto
it
=
thread2_nodes
.
begin
();
it
!=
thread2_nodes
.
end
();
it
++
)
{
if
((
*
it
)
->
Name
()
==
"op3"
)
{
EXPECT_EQ
((
*
it
)
->
GetChildren
().
size
(),
0u
);
EXPECT_EQ
((
*
it
)
->
GetRuntimeTraceEventNodes
().
size
(),
2u
);
}
}
tree
.
LogMe
(
&
logger
);
}
TEST
(
SerializationLoggerTest
,
dump_case1
)
{
std
::
list
<
HostTraceEvent
>
host_events
;
std
::
list
<
RuntimeTraceEvent
>
runtime_events
;
std
::
list
<
DeviceTraceEvent
>
device_events
;
runtime_events
.
push_back
(
RuntimeTraceEvent
(
std
::
string
(
"cudalaunch1"
),
15000
,
17000
,
10
,
10
,
1
,
0
));
runtime_events
.
push_back
(
RuntimeTraceEvent
(
std
::
string
(
"cudalaunch2"
),
25000
,
35000
,
10
,
10
,
2
,
0
));
runtime_events
.
push_back
(
RuntimeTraceEvent
(
std
::
string
(
"cudalaunch3"
),
33000
,
37000
,
10
,
11
,
3
,
0
));
runtime_events
.
push_back
(
RuntimeTraceEvent
(
std
::
string
(
"cudaMemcpy1"
),
18000
,
19000
,
10
,
10
,
4
,
0
));
runtime_events
.
push_back
(
RuntimeTraceEvent
(
std
::
string
(
"cudaMemset1"
),
38000
,
39000
,
10
,
11
,
5
,
0
));
device_events
.
push_back
(
DeviceTraceEvent
(
std
::
string
(
"kernel1"
),
TracerEventType
::
Kernel
,
40000
,
55000
,
0
,
10
,
10
,
1
,
KernelEventInfo
()));
device_events
.
push_back
(
DeviceTraceEvent
(
std
::
string
(
"kernel2"
),
TracerEventType
::
Kernel
,
70000
,
95000
,
0
,
10
,
10
,
2
,
KernelEventInfo
()));
device_events
.
push_back
(
DeviceTraceEvent
(
std
::
string
(
"kernel3"
),
TracerEventType
::
Kernel
,
60000
,
65000
,
0
,
10
,
11
,
3
,
KernelEventInfo
()));
device_events
.
push_back
(
DeviceTraceEvent
(
std
::
string
(
"memcpy1"
),
TracerEventType
::
Memcpy
,
56000
,
59000
,
0
,
10
,
10
,
4
,
MemcpyEventInfo
()));
device_events
.
push_back
(
DeviceTraceEvent
(
std
::
string
(
"memset1"
),
TracerEventType
::
Memset
,
66000
,
69000
,
0
,
10
,
11
,
5
,
MemsetEventInfo
()));
SerializationLogger
logger
(
"test_serialization_logger_case1.pb"
);
NodeTrees
tree
(
host_events
,
runtime_events
,
device_events
);
std
::
map
<
uint64_t
,
std
::
vector
<
HostTraceEventNode
*>>
nodes
=
tree
.
Traverse
(
true
);
EXPECT_EQ
(
nodes
[
10
].
size
(),
1u
);
EXPECT_EQ
(
nodes
[
11
].
size
(),
1u
);
std
::
vector
<
HostTraceEventNode
*>
thread1_nodes
=
nodes
[
10
];
std
::
vector
<
HostTraceEventNode
*>
thread2_nodes
=
nodes
[
11
];
for
(
auto
it
=
thread1_nodes
.
begin
();
it
!=
thread1_nodes
.
end
();
it
++
)
{
if
((
*
it
)
->
Name
()
==
"root node"
)
{
EXPECT_EQ
((
*
it
)
->
GetRuntimeTraceEventNodes
().
size
(),
3u
);
}
}
for
(
auto
it
=
thread2_nodes
.
begin
();
it
!=
thread2_nodes
.
end
();
it
++
)
{
if
((
*
it
)
->
Name
()
==
"root node"
)
{
EXPECT_EQ
((
*
it
)
->
GetChildren
().
size
(),
0u
);
EXPECT_EQ
((
*
it
)
->
GetRuntimeTraceEventNodes
().
size
(),
2u
);
}
}
tree
.
LogMe
(
&
logger
);
}
TEST
(
DeserializationReaderTest
,
restore_case0
)
{
DeserializationReader
reader
(
"test_serialization_logger_case0.pb"
);
std
::
unique_ptr
<
NodeTrees
>
tree
=
reader
.
Parse
();
std
::
map
<
uint64_t
,
std
::
vector
<
HostTraceEventNode
*>>
nodes
=
tree
->
Traverse
(
true
);
EXPECT_EQ
(
nodes
[
10
].
size
(),
4u
);
EXPECT_EQ
(
nodes
[
11
].
size
(),
2u
);
std
::
vector
<
HostTraceEventNode
*>
thread1_nodes
=
nodes
[
10
];
std
::
vector
<
HostTraceEventNode
*>
thread2_nodes
=
nodes
[
11
];
for
(
auto
it
=
thread1_nodes
.
begin
();
it
!=
thread1_nodes
.
end
();
it
++
)
{
if
((
*
it
)
->
Name
()
==
"root node"
)
{
EXPECT_EQ
((
*
it
)
->
GetChildren
().
size
(),
3u
);
}
if
((
*
it
)
->
Name
()
==
"op1"
)
{
EXPECT_EQ
((
*
it
)
->
GetChildren
().
size
(),
0u
);
EXPECT_EQ
((
*
it
)
->
GetRuntimeTraceEventNodes
().
size
(),
2u
);
}
}
for
(
auto
it
=
thread2_nodes
.
begin
();
it
!=
thread2_nodes
.
end
();
it
++
)
{
if
((
*
it
)
->
Name
()
==
"op3"
)
{
EXPECT_EQ
((
*
it
)
->
GetChildren
().
size
(),
0u
);
EXPECT_EQ
((
*
it
)
->
GetRuntimeTraceEventNodes
().
size
(),
2u
);
}
}
}
paddle/fluid/platform/profiler/utils.h
0 → 100644
浏览文件 @
40d2b7c6
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/platform/os_info.h"
namespace
paddle
{
namespace
platform
{
template
<
typename
...
Args
>
std
::
string
string_format
(
const
std
::
string
&
format
,
Args
...
args
)
{
int
size_s
=
std
::
snprintf
(
nullptr
,
0
,
format
.
c_str
(),
args
...)
+
1
;
// Extra space for '\0'
PADDLE_ENFORCE_GE
(
size_s
,
0
,
platform
::
errors
::
Fatal
(
"Error during profiler data formatting."
));
auto
size
=
static_cast
<
size_t
>
(
size_s
);
auto
buf
=
std
::
make_unique
<
char
[]
>
(
size
);
std
::
snprintf
(
buf
.
get
(),
size
,
format
.
c_str
(),
args
...);
return
std
::
string
(
buf
.
get
(),
size
-
1
);
// exclude the '\0'
}
static
std
::
string
GetStringFormatLocalTime
()
{
std
::
time_t
rawtime
;
std
::
tm
*
timeinfo
;
char
buf
[
100
];
std
::
time
(
&
rawtime
);
timeinfo
=
std
::
localtime
(
&
rawtime
);
std
::
strftime
(
buf
,
100
,
"%F-%X"
,
timeinfo
);
return
std
::
string
(
buf
);
}
static
int64_t
nsToUs
(
int64_t
ns
)
{
return
ns
/
1000
;
}
}
// namespace platform
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录