未验证 提交 6744754f 编写于 作者: C chenjian 提交者: GitHub

Add time range duration display (#41029)

* no

* fix bugs

* fix doc according to review

* fix api doc format

* fix api doc according to review

* fix bug and add unit test

* fix record event bug

* optimize chrome tracing display

* fix bug

* add comment

* add unit test

* fix a bug

* fix

* fix

* fix format
上级 b9da48da
......@@ -162,8 +162,9 @@ void RecordEvent::OriginalConstruct(const std::string &name,
void RecordEvent::End() {
#ifndef _WIN32
#ifdef PADDLE_WITH_CUDA
if (g_enable_nvprof_hook && is_pushed_ && is_enabled_) {
if (g_enable_nvprof_hook && is_pushed_) {
dynload::nvtxRangePop();
is_pushed_ = false;
}
#endif
#endif
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include <cstdio>
#include <ctime>
#include <limits>
#include "glog/logging.h"
......@@ -75,6 +76,26 @@ void ChromeTracingLogger::LogNodeTrees(const NodeTrees& node_trees) {
// log all nodes except root node, root node is a helper node.
const std::map<uint64_t, std::vector<HostTraceEventNode*>>
thread2host_event_nodes = node_trees.Traverse(true);
// find the earliest time in current timeline
start_time_ = std::numeric_limits<uint64_t>::max();
for (auto it = thread2host_event_nodes.begin();
it != thread2host_event_nodes.end(); ++it) {
if (it->second.begin() + 1 != it->second.end()) {
if ((*(it->second.begin() + 1))->StartNs() < start_time_) {
start_time_ = (*(it->second.begin() + 1))->StartNs();
}
} else {
auto runtimenode =
(*(it->second.begin()))->GetRuntimeTraceEventNodes().begin();
if (runtimenode !=
(*(it->second.begin()))->GetRuntimeTraceEventNodes().end()) {
if ((*runtimenode)->StartNs() < start_time_) {
start_time_ = (*runtimenode)->StartNs();
}
}
}
}
for (auto it = thread2host_event_nodes.begin();
it != thread2host_event_nodes.end(); ++it) {
for (auto hostnode = it->second.begin(); hostnode != it->second.end();
......@@ -102,6 +123,13 @@ void ChromeTracingLogger::LogHostTraceEventNode(
if (!output_file_stream_) {
return;
}
std::string dur_display;
float dur = nsToMsFloat(host_node.Duration());
if (dur > 1.0) {
dur_display = string_format(std::string("%.3f ms"), dur);
} else {
dur_display = string_format(std::string("%.3f us"), dur * 1000);
}
switch (host_node.Type()) {
case TracerEventType::ProfileStep:
case TracerEventType::Forward:
......@@ -110,42 +138,50 @@ void ChromeTracingLogger::LogHostTraceEventNode(
case TracerEventType::Optimization:
case TracerEventType::PythonOp:
case TracerEventType::PythonUserDefined:
// cname value comes from tracing.js reservedColorsByName variable
output_file_stream_ << string_format(
std::string(
R"JSON(
{
"name": "%s", "pid": %lld, "tid": "%lld(Python)",
"ts": %lld, "dur": %lld,
"name": "%s[%s]", "pid": %lld, "tid": "%lld(Python)",
"ts": %lld, "dur": %.3f,
"ph": "X", "cat": "%s",
"cname": "thread_state_runnable",
"args": {
"start_ns": %lld,
"end_ns": %lld
"start_time": "%.3f us",
"end_time": "%.3f us"
}
},
)JSON"),
host_node.Name().c_str(), host_node.ProcessId(), host_node.ThreadId(),
nsToUs(host_node.StartNs()), nsToUs(host_node.Duration()),
host_node.Name().c_str(), dur_display.c_str(), host_node.ProcessId(),
host_node.ThreadId(), nsToUs(host_node.StartNs()),
nsToUsFloat(host_node.Duration()),
categary_name_[static_cast<int>(host_node.Type())],
host_node.StartNs(), host_node.EndNs());
nsToUsFloat(host_node.StartNs(), start_time_),
nsToUsFloat(host_node.EndNs(), start_time_));
break;
default:
output_file_stream_ << string_format(
std::string(
R"JSON(
{
"name": "%s", "pid": %lld, "tid": "%lld(C++)",
"ts": %lld, "dur": %lld,
"name": "%s[%s]", "pid": %lld, "tid": "%lld(C++)",
"ts": %lld, "dur": %.3f,
"ph": "X", "cat": "%s",
"cname": "thread_state_runnable",
"args": {
"start_ns": %lld,
"end_ns": %lld
"start_time": "%.3f us",
"end_time": "%.3f us"
}
},
)JSON"),
host_node.Name().c_str(), host_node.ProcessId(), host_node.ThreadId(),
nsToUs(host_node.StartNs()), nsToUs(host_node.Duration()),
host_node.Name().c_str(), dur_display.c_str(), host_node.ProcessId(),
host_node.ThreadId(), nsToUs(host_node.StartNs()),
nsToUsFloat(host_node.Duration()),
categary_name_[static_cast<int>(host_node.Type())],
host_node.StartNs(), host_node.EndNs());
nsToUsFloat(host_node.StartNs(), start_time_),
nsToUsFloat(host_node.EndNs(), start_time_));
break;
}
......@@ -157,26 +193,35 @@ void ChromeTracingLogger::LogRuntimeTraceEventNode(
if (!output_file_stream_) {
return;
}
float dur = nsToMsFloat(runtime_node.Duration());
std::string dur_display;
if (dur > 1.0) {
dur_display = string_format(std::string("%.3f ms"), dur);
} else {
dur_display = string_format(std::string("%.3f us"), dur * 1000);
}
output_file_stream_ << string_format(
std::string(
R"JSON(
{
"name": "%s", "pid": %lld, "tid": "%lld(C++)",
"ts": %lld, "dur": %lld,
"name": "%s[%s]", "pid": %lld, "tid": "%lld(C++)",
"ts": %lld, "dur": %.3f,
"ph": "X", "cat": "%s",
"cname": "thread_state_running",
"args": {
"correlation id": %d,
"start_ns": %lld,
"end_ns": %lld
"start_time": "%.3f us",
"end_time": "%.3f us"
}
},
)JSON"),
runtime_node.Name().c_str(), runtime_node.ProcessId(),
runtime_node.ThreadId(), nsToUs(runtime_node.StartNs()),
nsToUs(runtime_node.Duration()),
runtime_node.Name().c_str(), dur_display.c_str(),
runtime_node.ProcessId(), runtime_node.ThreadId(),
nsToUs(runtime_node.StartNs()), nsToUsFloat(runtime_node.Duration()),
categary_name_[static_cast<int>(runtime_node.Type())],
runtime_node.CorrelationId(), runtime_node.StartNs(),
runtime_node.EndNs());
runtime_node.CorrelationId(),
nsToUsFloat(runtime_node.StartNs(), start_time_),
nsToUsFloat(runtime_node.EndNs(), start_time_));
pid_tid_set_.insert({runtime_node.ProcessId(), runtime_node.ThreadId()});
output_file_stream_ << string_format(
......@@ -199,6 +244,7 @@ void ChromeTracingLogger::LogDeviceTraceEventNode(
if (!output_file_stream_) {
return;
}
switch (device_node.Type()) {
case TracerEventType::Kernel:
HandleTypeKernel(device_node);
......@@ -265,17 +311,24 @@ void ChromeTracingLogger::HandleTypeKernel(
kernel_info.block_x, kernel_info.block_y, kernel_info.block_z,
blocks_per_sm);
#endif
float dur = nsToMsFloat(device_node.Duration());
std::string dur_display;
if (dur > 1.0) {
dur_display = string_format(std::string("%.3f ms"), dur);
} else {
dur_display = string_format(std::string("%.3f us"), dur * 1000);
}
output_file_stream_ << string_format(
std::string(
R"JSON(
{
"name": "%s", "pid": %lld, "tid": %lld,
"ts": %lld, "dur": %lld,
"name": "%s[%s]", "pid": %lld, "tid": %lld,
"ts": %lld, "dur": %.3f,
"ph": "X", "cat": "%s",
"cname": "rail_animation",
"args": {
"start_ns": %lld,
"end_ns": %lld,
"start_time": "%.3f us",
"end_time": "%.3f us",
"device": %d, "context": %d,
"stream": %d, "correlation id": %d,
"registers per thread": %d,
......@@ -284,15 +337,16 @@ void ChromeTracingLogger::HandleTypeKernel(
"warps per SM": %f,
"grid": [%d, %d, %d],
"block": [%d, %d, %d],
"theoretical achieved occupancy %%": %f
"theoretical achieved occupancy %%": %.3f
}
},
)JSON"),
device_node.Name().c_str(), device_node.DeviceId(),
device_node.Name().c_str(), dur_display.c_str(), device_node.DeviceId(),
device_node.StreamId(), nsToUs(device_node.StartNs()),
nsToUs(device_node.Duration()),
nsToUsFloat(device_node.Duration()),
categary_name_[static_cast<int>(device_node.Type())],
device_node.StartNs(), device_node.EndNs(), device_node.DeviceId(),
nsToUsFloat(device_node.StartNs(), start_time_),
nsToUsFloat(device_node.EndNs(), start_time_), device_node.DeviceId(),
device_node.ContextId(), device_node.StreamId(),
device_node.CorrelationId(), kernel_info.registers_per_thread,
kernel_info.static_shared_memory + kernel_info.dynamic_shared_memory,
......@@ -308,53 +362,71 @@ void ChromeTracingLogger::HandleTypeMemcpy(
if (device_node.Duration() > 0) {
memory_bandwidth = memcpy_info.num_bytes * 1.0 / device_node.Duration();
}
float dur = nsToMsFloat(device_node.Duration());
std::string dur_display;
if (dur > 1.0) {
dur_display = string_format(std::string("%.3f ms"), dur);
} else {
dur_display = string_format(std::string("%.3f us"), dur * 1000);
}
output_file_stream_ << string_format(
std::string(
R"JSON(
{
"name": "%s", "pid": %lld, "tid": %lld,
"ts": %lld, "dur": %lld,
"name": "%s[%s]", "pid": %lld, "tid": %lld,
"ts": %lld, "dur": %.3f,
"ph": "X", "cat": "%s",
"cname": "rail_animation",
"args": {
"start_ns": %lld,
"end_ns": %lld,
"start_time": "%.3f us",
"end_time": "%.3f us",
"stream": %d, "correlation id": %d,
"bytes": %d, "memory bandwidth (GB/s)": %f
"bytes": %d, "memory bandwidth (GB/s)": %.3f
}
},
)JSON"),
device_node.Name().c_str(), device_node.DeviceId(),
device_node.Name().c_str(), dur_display.c_str(), device_node.DeviceId(),
device_node.StreamId(), nsToUs(device_node.StartNs()),
nsToUs(device_node.Duration()),
nsToUsFloat(device_node.Duration()),
categary_name_[static_cast<int>(device_node.Type())],
device_node.StartNs(), device_node.EndNs(), device_node.StreamId(),
nsToUsFloat(device_node.StartNs(), start_time_),
nsToUsFloat(device_node.EndNs(), start_time_), device_node.StreamId(),
device_node.CorrelationId(), memcpy_info.num_bytes, memory_bandwidth);
}
void ChromeTracingLogger::HandleTypeMemset(
const DeviceTraceEventNode& device_node) {
MemsetEventInfo memset_info = device_node.MemsetInfo();
float dur = nsToMsFloat(device_node.Duration());
std::string dur_display;
if (dur > 1.0) {
dur_display = string_format(std::string("%.3f ms"), dur);
} else {
dur_display = string_format(std::string("%.3f us"), dur * 1000);
}
output_file_stream_ << string_format(
std::string(
R"JSON(
{
"name": "%s", "pid": %lld, "tid": %lld,
"ts": %lld, "dur": %lld,
"name": "%s[%s]", "pid": %lld, "tid": %lld,
"ts": %lld, "dur": %.3f,
"ph": "X", "cat": "%s",
"cname": "rail_animation",
"args": {
"start_ns": %lld,
"end_ns": %lld,
"start_time": "%.3f us",
"end_time": "%.3f us",
"device": %d, "context": %d,
"stream": %d, "correlation id": %d,
"bytes": %d, "value": %d
}
},
)JSON"),
device_node.Name().c_str(), device_node.DeviceId(),
device_node.Name().c_str(), dur_display.c_str(), device_node.DeviceId(),
device_node.StreamId(), nsToUs(device_node.StartNs()),
nsToUs(device_node.Duration()),
nsToUsFloat(device_node.Duration()),
categary_name_[static_cast<int>(device_node.Type())],
device_node.StartNs(), device_node.EndNs(), device_node.DeviceId(),
nsToUsFloat(device_node.StartNs(), start_time_),
nsToUsFloat(device_node.EndNs(), start_time_), device_node.DeviceId(),
device_node.ContextId(), device_node.StreamId(),
device_node.CorrelationId(), memset_info.num_bytes, memset_info.value);
}
......
......@@ -50,6 +50,7 @@ class ChromeTracingLogger : public BaseLogger {
static const char* categary_name_[];
std::set<std::pair<uint64_t, uint64_t>> pid_tid_set_;
std::set<std::pair<uint64_t, uint64_t>> deviceid_streamid_set_;
uint64_t start_time_;
};
} // namespace platform
......
......@@ -44,7 +44,16 @@ static std::string GetStringFormatLocalTime() {
return std::string(buf);
}
static int64_t nsToUs(int64_t ns) { return ns / 1000; }
static int64_t nsToUs(uint64_t end_ns, uint64_t start_ns = 0) {
return (end_ns - start_ns) / 1000;
}
static float nsToUsFloat(uint64_t end_ns, uint64_t start_ns = 0) {
return static_cast<float>(end_ns - start_ns) / 1000;
}
static float nsToMsFloat(uint64_t end_ns, uint64_t start_ns = 0) {
return static_cast<float>(end_ns - start_ns) / 1000 / 1000;
}
#ifdef PADDLE_WITH_CUPTI
float CalculateEstOccupancy(uint32_t deviceId, uint16_t registersPerThread,
......
......@@ -128,6 +128,16 @@ class TestProfiler(unittest.TestCase):
result = profiler.utils.load_profiler_result('./test_profiler_pb.pb')
class TestNvprof(unittest.TestCase):
def test_nvprof(self):
for i in range(10):
paddle.fluid.profiler._nvprof_range(i, 10, 20)
x_value = np.random.randn(2, 3, 3)
x = paddle.to_tensor(
x_value, stop_gradient=False, place=paddle.CPUPlace())
y = x / 2.0
class RandomDataset(Dataset):
def __init__(self, num_samples):
self.num_samples = num_samples
......
......@@ -77,17 +77,19 @@ class RecordEvent(ContextDecorator):
r"""
Record the time of begining.
.. code-block:: python
:name: code-example2
Examples:
import paddle
import paddle.profiler as profiler
record_event = profiler.RecordEvent("record_sub")
record_event.begin()
data1 = paddle.randn(shape=[3])
data2 = paddle.randn(shape=[3])
result = data1 - data2
record_event.end()
.. code-block:: python
:name: code-example2
import paddle
import paddle.profiler as profiler
record_event = profiler.RecordEvent("record_sub")
record_event.begin()
data1 = paddle.randn(shape=[3])
data2 = paddle.randn(shape=[3])
result = data1 - data2
record_event.end()
"""
if self.event_type not in _AllowedEventTypeList:
warn("Only TracerEvent Type in [{}, {}, {}, {}, {}, {},{}]\
......@@ -102,17 +104,19 @@ class RecordEvent(ContextDecorator):
r'''
Record the time of ending.
.. code-block:: python
:name: code-example3
Examples:
import paddle
import paddle.profiler as profiler
record_event = profiler.RecordEvent("record_mul")
record_event.begin()
data1 = paddle.randn(shape=[3])
data2 = paddle.randn(shape=[3])
result = data1 * data2
record_event.end()
.. code-block:: python
:name: code-example3
import paddle
import paddle.profiler as profiler
record_event = profiler.RecordEvent("record_mul")
record_event.begin()
data1 = paddle.randn(shape=[3])
data2 = paddle.randn(shape=[3])
result = data1 * data2
record_event.end()
'''
if self.event:
self.event.end()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册