未验证 提交 6744754f 编写于 作者: C chenjian 提交者: GitHub

Add time range duration display (#41029)

* no

* fix bugs

* fix doc according to review

* fix api doc format

* fix api doc according to review

* fix bug and add unit test

* fix record event bug

* optimize chrome tracing display

* fix bug

* add comment

* add unit test

* fix a bug

* fix

* fix

* fix format
上级 b9da48da
...@@ -162,8 +162,9 @@ void RecordEvent::OriginalConstruct(const std::string &name, ...@@ -162,8 +162,9 @@ void RecordEvent::OriginalConstruct(const std::string &name,
void RecordEvent::End() { void RecordEvent::End() {
#ifndef _WIN32 #ifndef _WIN32
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (g_enable_nvprof_hook && is_pushed_ && is_enabled_) { if (g_enable_nvprof_hook && is_pushed_) {
dynload::nvtxRangePop(); dynload::nvtxRangePop();
is_pushed_ = false;
} }
#endif #endif
#endif #endif
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include <cstdio> #include <cstdio>
#include <ctime> #include <ctime>
#include <limits>
#include "glog/logging.h" #include "glog/logging.h"
...@@ -75,6 +76,26 @@ void ChromeTracingLogger::LogNodeTrees(const NodeTrees& node_trees) { ...@@ -75,6 +76,26 @@ void ChromeTracingLogger::LogNodeTrees(const NodeTrees& node_trees) {
// log all nodes except root node, root node is a helper node. // log all nodes except root node, root node is a helper node.
const std::map<uint64_t, std::vector<HostTraceEventNode*>> const std::map<uint64_t, std::vector<HostTraceEventNode*>>
thread2host_event_nodes = node_trees.Traverse(true); thread2host_event_nodes = node_trees.Traverse(true);
// find the earliest time in current timeline
start_time_ = std::numeric_limits<uint64_t>::max();
for (auto it = thread2host_event_nodes.begin();
it != thread2host_event_nodes.end(); ++it) {
if (it->second.begin() + 1 != it->second.end()) {
if ((*(it->second.begin() + 1))->StartNs() < start_time_) {
start_time_ = (*(it->second.begin() + 1))->StartNs();
}
} else {
auto runtimenode =
(*(it->second.begin()))->GetRuntimeTraceEventNodes().begin();
if (runtimenode !=
(*(it->second.begin()))->GetRuntimeTraceEventNodes().end()) {
if ((*runtimenode)->StartNs() < start_time_) {
start_time_ = (*runtimenode)->StartNs();
}
}
}
}
for (auto it = thread2host_event_nodes.begin(); for (auto it = thread2host_event_nodes.begin();
it != thread2host_event_nodes.end(); ++it) { it != thread2host_event_nodes.end(); ++it) {
for (auto hostnode = it->second.begin(); hostnode != it->second.end(); for (auto hostnode = it->second.begin(); hostnode != it->second.end();
...@@ -102,6 +123,13 @@ void ChromeTracingLogger::LogHostTraceEventNode( ...@@ -102,6 +123,13 @@ void ChromeTracingLogger::LogHostTraceEventNode(
if (!output_file_stream_) { if (!output_file_stream_) {
return; return;
} }
std::string dur_display;
float dur = nsToMsFloat(host_node.Duration());
if (dur > 1.0) {
dur_display = string_format(std::string("%.3f ms"), dur);
} else {
dur_display = string_format(std::string("%.3f us"), dur * 1000);
}
switch (host_node.Type()) { switch (host_node.Type()) {
case TracerEventType::ProfileStep: case TracerEventType::ProfileStep:
case TracerEventType::Forward: case TracerEventType::Forward:
...@@ -110,42 +138,50 @@ void ChromeTracingLogger::LogHostTraceEventNode( ...@@ -110,42 +138,50 @@ void ChromeTracingLogger::LogHostTraceEventNode(
case TracerEventType::Optimization: case TracerEventType::Optimization:
case TracerEventType::PythonOp: case TracerEventType::PythonOp:
case TracerEventType::PythonUserDefined: case TracerEventType::PythonUserDefined:
// cname value comes from tracing.js reservedColorsByName variable
output_file_stream_ << string_format( output_file_stream_ << string_format(
std::string( std::string(
R"JSON( R"JSON(
{ {
"name": "%s", "pid": %lld, "tid": "%lld(Python)", "name": "%s[%s]", "pid": %lld, "tid": "%lld(Python)",
"ts": %lld, "dur": %lld, "ts": %lld, "dur": %.3f,
"ph": "X", "cat": "%s", "ph": "X", "cat": "%s",
"cname": "thread_state_runnable",
"args": { "args": {
"start_ns": %lld, "start_time": "%.3f us",
"end_ns": %lld "end_time": "%.3f us"
} }
}, },
)JSON"), )JSON"),
host_node.Name().c_str(), host_node.ProcessId(), host_node.ThreadId(), host_node.Name().c_str(), dur_display.c_str(), host_node.ProcessId(),
nsToUs(host_node.StartNs()), nsToUs(host_node.Duration()), host_node.ThreadId(), nsToUs(host_node.StartNs()),
nsToUsFloat(host_node.Duration()),
categary_name_[static_cast<int>(host_node.Type())], categary_name_[static_cast<int>(host_node.Type())],
host_node.StartNs(), host_node.EndNs()); nsToUsFloat(host_node.StartNs(), start_time_),
nsToUsFloat(host_node.EndNs(), start_time_));
break; break;
default: default:
output_file_stream_ << string_format( output_file_stream_ << string_format(
std::string( std::string(
R"JSON( R"JSON(
{ {
"name": "%s", "pid": %lld, "tid": "%lld(C++)", "name": "%s[%s]", "pid": %lld, "tid": "%lld(C++)",
"ts": %lld, "dur": %lld, "ts": %lld, "dur": %.3f,
"ph": "X", "cat": "%s", "ph": "X", "cat": "%s",
"cname": "thread_state_runnable",
"args": { "args": {
"start_ns": %lld, "start_time": "%.3f us",
"end_ns": %lld "end_time": "%.3f us"
} }
}, },
)JSON"), )JSON"),
host_node.Name().c_str(), host_node.ProcessId(), host_node.ThreadId(), host_node.Name().c_str(), dur_display.c_str(), host_node.ProcessId(),
nsToUs(host_node.StartNs()), nsToUs(host_node.Duration()), host_node.ThreadId(), nsToUs(host_node.StartNs()),
nsToUsFloat(host_node.Duration()),
categary_name_[static_cast<int>(host_node.Type())], categary_name_[static_cast<int>(host_node.Type())],
host_node.StartNs(), host_node.EndNs()); nsToUsFloat(host_node.StartNs(), start_time_),
nsToUsFloat(host_node.EndNs(), start_time_));
break; break;
} }
...@@ -157,26 +193,35 @@ void ChromeTracingLogger::LogRuntimeTraceEventNode( ...@@ -157,26 +193,35 @@ void ChromeTracingLogger::LogRuntimeTraceEventNode(
if (!output_file_stream_) { if (!output_file_stream_) {
return; return;
} }
float dur = nsToMsFloat(runtime_node.Duration());
std::string dur_display;
if (dur > 1.0) {
dur_display = string_format(std::string("%.3f ms"), dur);
} else {
dur_display = string_format(std::string("%.3f us"), dur * 1000);
}
output_file_stream_ << string_format( output_file_stream_ << string_format(
std::string( std::string(
R"JSON( R"JSON(
{ {
"name": "%s", "pid": %lld, "tid": "%lld(C++)", "name": "%s[%s]", "pid": %lld, "tid": "%lld(C++)",
"ts": %lld, "dur": %lld, "ts": %lld, "dur": %.3f,
"ph": "X", "cat": "%s", "ph": "X", "cat": "%s",
"cname": "thread_state_running",
"args": { "args": {
"correlation id": %d, "correlation id": %d,
"start_ns": %lld, "start_time": "%.3f us",
"end_ns": %lld "end_time": "%.3f us"
} }
}, },
)JSON"), )JSON"),
runtime_node.Name().c_str(), runtime_node.ProcessId(), runtime_node.Name().c_str(), dur_display.c_str(),
runtime_node.ThreadId(), nsToUs(runtime_node.StartNs()), runtime_node.ProcessId(), runtime_node.ThreadId(),
nsToUs(runtime_node.Duration()), nsToUs(runtime_node.StartNs()), nsToUsFloat(runtime_node.Duration()),
categary_name_[static_cast<int>(runtime_node.Type())], categary_name_[static_cast<int>(runtime_node.Type())],
runtime_node.CorrelationId(), runtime_node.StartNs(), runtime_node.CorrelationId(),
runtime_node.EndNs()); nsToUsFloat(runtime_node.StartNs(), start_time_),
nsToUsFloat(runtime_node.EndNs(), start_time_));
pid_tid_set_.insert({runtime_node.ProcessId(), runtime_node.ThreadId()}); pid_tid_set_.insert({runtime_node.ProcessId(), runtime_node.ThreadId()});
output_file_stream_ << string_format( output_file_stream_ << string_format(
...@@ -199,6 +244,7 @@ void ChromeTracingLogger::LogDeviceTraceEventNode( ...@@ -199,6 +244,7 @@ void ChromeTracingLogger::LogDeviceTraceEventNode(
if (!output_file_stream_) { if (!output_file_stream_) {
return; return;
} }
switch (device_node.Type()) { switch (device_node.Type()) {
case TracerEventType::Kernel: case TracerEventType::Kernel:
HandleTypeKernel(device_node); HandleTypeKernel(device_node);
...@@ -265,17 +311,24 @@ void ChromeTracingLogger::HandleTypeKernel( ...@@ -265,17 +311,24 @@ void ChromeTracingLogger::HandleTypeKernel(
kernel_info.block_x, kernel_info.block_y, kernel_info.block_z, kernel_info.block_x, kernel_info.block_y, kernel_info.block_z,
blocks_per_sm); blocks_per_sm);
#endif #endif
float dur = nsToMsFloat(device_node.Duration());
std::string dur_display;
if (dur > 1.0) {
dur_display = string_format(std::string("%.3f ms"), dur);
} else {
dur_display = string_format(std::string("%.3f us"), dur * 1000);
}
output_file_stream_ << string_format( output_file_stream_ << string_format(
std::string( std::string(
R"JSON( R"JSON(
{ {
"name": "%s", "pid": %lld, "tid": %lld, "name": "%s[%s]", "pid": %lld, "tid": %lld,
"ts": %lld, "dur": %lld, "ts": %lld, "dur": %.3f,
"ph": "X", "cat": "%s", "ph": "X", "cat": "%s",
"cname": "rail_animation",
"args": { "args": {
"start_ns": %lld, "start_time": "%.3f us",
"end_ns": %lld, "end_time": "%.3f us",
"device": %d, "context": %d, "device": %d, "context": %d,
"stream": %d, "correlation id": %d, "stream": %d, "correlation id": %d,
"registers per thread": %d, "registers per thread": %d,
...@@ -284,15 +337,16 @@ void ChromeTracingLogger::HandleTypeKernel( ...@@ -284,15 +337,16 @@ void ChromeTracingLogger::HandleTypeKernel(
"warps per SM": %f, "warps per SM": %f,
"grid": [%d, %d, %d], "grid": [%d, %d, %d],
"block": [%d, %d, %d], "block": [%d, %d, %d],
"theoretical achieved occupancy %%": %f "theoretical achieved occupancy %%": %.3f
} }
}, },
)JSON"), )JSON"),
device_node.Name().c_str(), device_node.DeviceId(), device_node.Name().c_str(), dur_display.c_str(), device_node.DeviceId(),
device_node.StreamId(), nsToUs(device_node.StartNs()), device_node.StreamId(), nsToUs(device_node.StartNs()),
nsToUs(device_node.Duration()), nsToUsFloat(device_node.Duration()),
categary_name_[static_cast<int>(device_node.Type())], categary_name_[static_cast<int>(device_node.Type())],
device_node.StartNs(), device_node.EndNs(), device_node.DeviceId(), nsToUsFloat(device_node.StartNs(), start_time_),
nsToUsFloat(device_node.EndNs(), start_time_), device_node.DeviceId(),
device_node.ContextId(), device_node.StreamId(), device_node.ContextId(), device_node.StreamId(),
device_node.CorrelationId(), kernel_info.registers_per_thread, device_node.CorrelationId(), kernel_info.registers_per_thread,
kernel_info.static_shared_memory + kernel_info.dynamic_shared_memory, kernel_info.static_shared_memory + kernel_info.dynamic_shared_memory,
...@@ -308,53 +362,71 @@ void ChromeTracingLogger::HandleTypeMemcpy( ...@@ -308,53 +362,71 @@ void ChromeTracingLogger::HandleTypeMemcpy(
if (device_node.Duration() > 0) { if (device_node.Duration() > 0) {
memory_bandwidth = memcpy_info.num_bytes * 1.0 / device_node.Duration(); memory_bandwidth = memcpy_info.num_bytes * 1.0 / device_node.Duration();
} }
float dur = nsToMsFloat(device_node.Duration());
std::string dur_display;
if (dur > 1.0) {
dur_display = string_format(std::string("%.3f ms"), dur);
} else {
dur_display = string_format(std::string("%.3f us"), dur * 1000);
}
output_file_stream_ << string_format( output_file_stream_ << string_format(
std::string( std::string(
R"JSON( R"JSON(
{ {
"name": "%s", "pid": %lld, "tid": %lld, "name": "%s[%s]", "pid": %lld, "tid": %lld,
"ts": %lld, "dur": %lld, "ts": %lld, "dur": %.3f,
"ph": "X", "cat": "%s", "ph": "X", "cat": "%s",
"cname": "rail_animation",
"args": { "args": {
"start_ns": %lld, "start_time": "%.3f us",
"end_ns": %lld, "end_time": "%.3f us",
"stream": %d, "correlation id": %d, "stream": %d, "correlation id": %d,
"bytes": %d, "memory bandwidth (GB/s)": %f "bytes": %d, "memory bandwidth (GB/s)": %.3f
} }
}, },
)JSON"), )JSON"),
device_node.Name().c_str(), device_node.DeviceId(), device_node.Name().c_str(), dur_display.c_str(), device_node.DeviceId(),
device_node.StreamId(), nsToUs(device_node.StartNs()), device_node.StreamId(), nsToUs(device_node.StartNs()),
nsToUs(device_node.Duration()), nsToUsFloat(device_node.Duration()),
categary_name_[static_cast<int>(device_node.Type())], categary_name_[static_cast<int>(device_node.Type())],
device_node.StartNs(), device_node.EndNs(), device_node.StreamId(), nsToUsFloat(device_node.StartNs(), start_time_),
nsToUsFloat(device_node.EndNs(), start_time_), device_node.StreamId(),
device_node.CorrelationId(), memcpy_info.num_bytes, memory_bandwidth); device_node.CorrelationId(), memcpy_info.num_bytes, memory_bandwidth);
} }
void ChromeTracingLogger::HandleTypeMemset( void ChromeTracingLogger::HandleTypeMemset(
const DeviceTraceEventNode& device_node) { const DeviceTraceEventNode& device_node) {
MemsetEventInfo memset_info = device_node.MemsetInfo(); MemsetEventInfo memset_info = device_node.MemsetInfo();
float dur = nsToMsFloat(device_node.Duration());
std::string dur_display;
if (dur > 1.0) {
dur_display = string_format(std::string("%.3f ms"), dur);
} else {
dur_display = string_format(std::string("%.3f us"), dur * 1000);
}
output_file_stream_ << string_format( output_file_stream_ << string_format(
std::string( std::string(
R"JSON( R"JSON(
{ {
"name": "%s", "pid": %lld, "tid": %lld, "name": "%s[%s]", "pid": %lld, "tid": %lld,
"ts": %lld, "dur": %lld, "ts": %lld, "dur": %.3f,
"ph": "X", "cat": "%s", "ph": "X", "cat": "%s",
"cname": "rail_animation",
"args": { "args": {
"start_ns": %lld, "start_time": "%.3f us",
"end_ns": %lld, "end_time": "%.3f us",
"device": %d, "context": %d, "device": %d, "context": %d,
"stream": %d, "correlation id": %d, "stream": %d, "correlation id": %d,
"bytes": %d, "value": %d "bytes": %d, "value": %d
} }
}, },
)JSON"), )JSON"),
device_node.Name().c_str(), device_node.DeviceId(), device_node.Name().c_str(), dur_display.c_str(), device_node.DeviceId(),
device_node.StreamId(), nsToUs(device_node.StartNs()), device_node.StreamId(), nsToUs(device_node.StartNs()),
nsToUs(device_node.Duration()), nsToUsFloat(device_node.Duration()),
categary_name_[static_cast<int>(device_node.Type())], categary_name_[static_cast<int>(device_node.Type())],
device_node.StartNs(), device_node.EndNs(), device_node.DeviceId(), nsToUsFloat(device_node.StartNs(), start_time_),
nsToUsFloat(device_node.EndNs(), start_time_), device_node.DeviceId(),
device_node.ContextId(), device_node.StreamId(), device_node.ContextId(), device_node.StreamId(),
device_node.CorrelationId(), memset_info.num_bytes, memset_info.value); device_node.CorrelationId(), memset_info.num_bytes, memset_info.value);
} }
......
...@@ -50,6 +50,7 @@ class ChromeTracingLogger : public BaseLogger { ...@@ -50,6 +50,7 @@ class ChromeTracingLogger : public BaseLogger {
static const char* categary_name_[]; static const char* categary_name_[];
std::set<std::pair<uint64_t, uint64_t>> pid_tid_set_; std::set<std::pair<uint64_t, uint64_t>> pid_tid_set_;
std::set<std::pair<uint64_t, uint64_t>> deviceid_streamid_set_; std::set<std::pair<uint64_t, uint64_t>> deviceid_streamid_set_;
uint64_t start_time_;
}; };
} // namespace platform } // namespace platform
......
...@@ -44,7 +44,16 @@ static std::string GetStringFormatLocalTime() { ...@@ -44,7 +44,16 @@ static std::string GetStringFormatLocalTime() {
return std::string(buf); return std::string(buf);
} }
static int64_t nsToUs(int64_t ns) { return ns / 1000; } static int64_t nsToUs(uint64_t end_ns, uint64_t start_ns = 0) {
return (end_ns - start_ns) / 1000;
}
static float nsToUsFloat(uint64_t end_ns, uint64_t start_ns = 0) {
return static_cast<float>(end_ns - start_ns) / 1000;
}
static float nsToMsFloat(uint64_t end_ns, uint64_t start_ns = 0) {
return static_cast<float>(end_ns - start_ns) / 1000 / 1000;
}
#ifdef PADDLE_WITH_CUPTI #ifdef PADDLE_WITH_CUPTI
float CalculateEstOccupancy(uint32_t deviceId, uint16_t registersPerThread, float CalculateEstOccupancy(uint32_t deviceId, uint16_t registersPerThread,
......
...@@ -128,6 +128,16 @@ class TestProfiler(unittest.TestCase): ...@@ -128,6 +128,16 @@ class TestProfiler(unittest.TestCase):
result = profiler.utils.load_profiler_result('./test_profiler_pb.pb') result = profiler.utils.load_profiler_result('./test_profiler_pb.pb')
class TestNvprof(unittest.TestCase):
def test_nvprof(self):
for i in range(10):
paddle.fluid.profiler._nvprof_range(i, 10, 20)
x_value = np.random.randn(2, 3, 3)
x = paddle.to_tensor(
x_value, stop_gradient=False, place=paddle.CPUPlace())
y = x / 2.0
class RandomDataset(Dataset): class RandomDataset(Dataset):
def __init__(self, num_samples): def __init__(self, num_samples):
self.num_samples = num_samples self.num_samples = num_samples
......
...@@ -77,17 +77,19 @@ class RecordEvent(ContextDecorator): ...@@ -77,17 +77,19 @@ class RecordEvent(ContextDecorator):
r""" r"""
Record the time of begining. Record the time of begining.
.. code-block:: python Examples:
:name: code-example2
import paddle .. code-block:: python
import paddle.profiler as profiler :name: code-example2
record_event = profiler.RecordEvent("record_sub")
record_event.begin() import paddle
data1 = paddle.randn(shape=[3]) import paddle.profiler as profiler
data2 = paddle.randn(shape=[3]) record_event = profiler.RecordEvent("record_sub")
result = data1 - data2 record_event.begin()
record_event.end() data1 = paddle.randn(shape=[3])
data2 = paddle.randn(shape=[3])
result = data1 - data2
record_event.end()
""" """
if self.event_type not in _AllowedEventTypeList: if self.event_type not in _AllowedEventTypeList:
warn("Only TracerEvent Type in [{}, {}, {}, {}, {}, {},{}]\ warn("Only TracerEvent Type in [{}, {}, {}, {}, {}, {},{}]\
...@@ -102,17 +104,19 @@ class RecordEvent(ContextDecorator): ...@@ -102,17 +104,19 @@ class RecordEvent(ContextDecorator):
r''' r'''
Record the time of ending. Record the time of ending.
.. code-block:: python Examples:
:name: code-example3
import paddle .. code-block:: python
import paddle.profiler as profiler :name: code-example3
record_event = profiler.RecordEvent("record_mul")
record_event.begin() import paddle
data1 = paddle.randn(shape=[3]) import paddle.profiler as profiler
data2 = paddle.randn(shape=[3]) record_event = profiler.RecordEvent("record_mul")
result = data1 * data2 record_event.begin()
record_event.end() data1 = paddle.randn(shape=[3])
data2 = paddle.randn(shape=[3])
result = data1 * data2
record_event.end()
''' '''
if self.event: if self.event:
self.event.end() self.event.end()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册