未验证 提交 0c3f7fbc 编写于 作者: C chenjian 提交者: GitHub

Upgrade new profiler (#39984)

* add new profiler components

* fix bug

* upgrade new profiler

* fix operator.cc

* fix operator.cc

* fix cmakelists.txt

* fix bug

* fix according to pr

* fix bug

* fix cmake

* fix bug

* fix a bug

* fix bug

* fix bug
上级 bc113e10
...@@ -264,10 +264,10 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { ...@@ -264,10 +264,10 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
// and different op name cost time,we set two event. // and different op name cost time,we set two event.
platform::RecordEvent op_type_record_event( platform::RecordEvent op_type_record_event(
Type(), platform::TracerEventType::Operator, 1); Type(), platform::TracerEventType::Operator, 1);
// auto op_name = platform::OpName(outputs_, Type()); auto op_name = platform::OpName(outputs_, Type());
// platform::RecordEvent op_name_record_event( platform::RecordEvent op_name_record_event(
// op_name, platform::TracerEventType::Operator, 1, op_name, platform::TracerEventType::Operator, 10,
// platform::EventRole::kUniqueOp); platform::EventRole::kUniqueOp);
RunImpl(scope, place); RunImpl(scope, place);
} }
......
...@@ -2,10 +2,12 @@ cc_library(host_tracer SRCS host_tracer.cc DEPS enforce) ...@@ -2,10 +2,12 @@ cc_library(host_tracer SRCS host_tracer.cc DEPS enforce)
cc_library(cuda_tracer SRCS cuda_tracer.cc cupti_data_process.cc DEPS workqueue_utils enforce glog) cc_library(cuda_tracer SRCS cuda_tracer.cc cupti_data_process.cc DEPS workqueue_utils enforce glog)
cc_library(event_node SRCS event_node.cc DEPS enforce) cc_library(event_node SRCS event_node.cc DEPS enforce)
cc_library(profiler_utils SRCS utils.cc DEPS enforce glog) cc_library(profiler_utils SRCS utils.cc DEPS enforce glog)
cc_library(chrometracinglogger SRCS chrometracing_logger.cc DEPS event_node profiler_utils)
cc_test(test_event_node SRCS test_event_node.cc DEPS event_node chrometracinglogger)
add_subdirectory(dump) add_subdirectory(dump)
cc_library(profiler_logger SRCS chrometracing_logger.cc dump/serialization_logger.cc dump/deserialization_reader.cc DEPS nodetreeproto event_node profiler_utils)
cc_library(event_bind SRCS event_python.cc DEPS profiler_logger)
cc_library(cpu_utilization SRCS cpu_utilization.cc DEPS cpu_info os_info enforce glog) cc_library(cpu_utilization SRCS cpu_utilization.cc DEPS cpu_info os_info enforce glog)
cc_library(new_profiler SRCS profiler.cc DEPS host_tracer cuda_tracer profiler_utils cpu_utilization event_bind)
cc_test(test_event_node SRCS test_event_node.cc DEPS event_node profiler_logger)
cc_test(test_extra_info SRCS test_extra_info.cc DEPS profiler_utils) cc_test(test_extra_info SRCS test_extra_info.cc DEPS profiler_utils)
cc_library(new_profiler SRCS profiler.cc DEPS host_tracer cuda_tracer profiler_utils cpu_utilization) cc_test(test_serialization_logger SRCS dump/test_serialization_logger.cc DEPS event_bind)
cc_test(new_profiler_test SRCS profiler_test.cc DEPS new_profiler event_node) cc_test(new_profiler_test SRCS profiler_test.cc DEPS new_profiler)
...@@ -18,40 +18,17 @@ limitations under the License. */ ...@@ -18,40 +18,17 @@ limitations under the License. */
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/os_info.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/profiler/chrometracing_logger.h" #include "paddle/fluid/platform/profiler/chrometracing_logger.h"
#include "paddle/fluid/platform/profiler/event_node.h" #include "paddle/fluid/platform/profiler/event_node.h"
#include "paddle/fluid/platform/profiler/utils.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
static const char* kSchemaVersion = "1.0.0"; static const char* kSchemaVersion = "1.0.0";
static const char* kDefaultFilename = "pid_%s_time_%s.paddle_trace.json"; static const char* kDefaultFilename = "pid_%s_time_%s.paddle_trace.json";
static uint32_t num_span = 0; static uint32_t span_indx = 0;
static int64_t nsToUs(int64_t ns) { return ns / 1000; }
template <typename... Args>
std::string string_format(const std::string& format, Args... args) {
int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) +
1; // Extra space for '\0'
PADDLE_ENFORCE_GE(size_s, 0, platform::errors::Fatal(
"Error during profiler data formatting."));
auto size = static_cast<size_t>(size_s);
auto buf = std::make_unique<char[]>(size);
std::snprintf(buf.get(), size, format.c_str(), args...);
return std::string(buf.get(), size - 1); // exclude the '\0'
}
std::string GetStringFormatLocalTime() {
std::time_t rawtime;
std::tm* timeinfo;
char buf[100];
std::time(&rawtime);
timeinfo = std::localtime(&rawtime);
std::strftime(buf, 100, "%F-%X", timeinfo);
return std::string(buf);
}
static std::string DefaultFileName() { static std::string DefaultFileName() {
auto pid = GetProcessId(); auto pid = GetProcessId();
...@@ -60,16 +37,19 @@ static std::string DefaultFileName() { ...@@ -60,16 +37,19 @@ static std::string DefaultFileName() {
} }
const char* ChromeTracingLogger::categary_name_[] = { const char* ChromeTracingLogger::categary_name_[] = {
"operator", "dataloader", "profile_step", "cuda_runtime", "kernel", "Operator", "Dataloader", "ProfileStep", "CudaRuntime",
"memcpy", "memset", "user_defined", "others"}; "Kernel", "Memcpy", "Memset", "UserDefined",
"OperatorInner", "Forward", "Backward", "Optimization",
"Communication", "PythonOp", "PythonUserDefined"};
void ChromeTracingLogger::OpenFile() { void ChromeTracingLogger::OpenFile() {
output_file_stream_.open(filename_, output_file_stream_.open(filename_,
std::ofstream::out | std::ofstream::trunc); std::ofstream::out | std::ofstream::trunc);
if (!output_file_stream_) { if (!output_file_stream_) {
VLOG(2) << "Unable to open file for writing profiling data." << std::endl; LOG(WARNING) << "Unable to open file for writing profiling data."
<< std::endl;
} else { } else {
VLOG(0) << "writing profiling data to " << filename_ << std::endl; LOG(INFO) << "writing profiling data to " << filename_ << std::endl;
} }
} }
...@@ -122,21 +102,54 @@ void ChromeTracingLogger::LogHostTraceEventNode( ...@@ -122,21 +102,54 @@ void ChromeTracingLogger::LogHostTraceEventNode(
if (!output_file_stream_) { if (!output_file_stream_) {
return; return;
} }
output_file_stream_ << string_format( switch (host_node.Type()) {
std::string( case TracerEventType::ProfileStep:
R"JSON( case TracerEventType::Forward:
case TracerEventType::Backward:
case TracerEventType::Dataloader:
case TracerEventType::Optimization:
case TracerEventType::PythonOp:
case TracerEventType::PythonUserDefined:
output_file_stream_ << string_format(
std::string(
R"JSON(
{ {
"name": "%s", "pid": %lld, "tid": %lld, "name": "%s", "pid": %lld, "tid": "%lld(Python)",
"ts": %lld, "dur": %lld, "ts": %lld, "dur": %lld,
"ph": "X", "cat": "%s", "ph": "X", "cat": "%s",
"args": { "args": {
"start_ns": %lld,
"end_ns": %lld
} }
}, },
)JSON"), )JSON"),
host_node.Name().c_str(), host_node.ProcessId(), host_node.ThreadId(), host_node.Name().c_str(), host_node.ProcessId(), host_node.ThreadId(),
nsToUs(host_node.StartNs()), nsToUs(host_node.Duration()), nsToUs(host_node.StartNs()), nsToUs(host_node.Duration()),
categary_name_[static_cast<int>(host_node.Type())]); categary_name_[static_cast<int>(host_node.Type())],
host_node.StartNs(), host_node.EndNs());
break;
default:
output_file_stream_ << string_format(
std::string(
R"JSON(
{
"name": "%s", "pid": %lld, "tid": "%lld(C++)",
"ts": %lld, "dur": %lld,
"ph": "X", "cat": "%s",
"args": {
"start_ns": %lld,
"end_ns": %lld
}
},
)JSON"),
host_node.Name().c_str(), host_node.ProcessId(), host_node.ThreadId(),
nsToUs(host_node.StartNs()), nsToUs(host_node.Duration()),
categary_name_[static_cast<int>(host_node.Type())],
host_node.StartNs(), host_node.EndNs());
break;
}
pid_tid_set_.insert({host_node.ProcessId(), host_node.ThreadId()});
} }
void ChromeTracingLogger::LogRuntimeTraceEventNode( void ChromeTracingLogger::LogRuntimeTraceEventNode(
...@@ -148,11 +161,13 @@ void ChromeTracingLogger::LogRuntimeTraceEventNode( ...@@ -148,11 +161,13 @@ void ChromeTracingLogger::LogRuntimeTraceEventNode(
std::string( std::string(
R"JSON( R"JSON(
{ {
"name": "%s", "pid": %lld, "tid": %lld, "name": "%s", "pid": %lld, "tid": "%lld(C++)",
"ts": %lld, "dur": %lld, "ts": %lld, "dur": %lld,
"ph": "X", "cat": "%s", "ph": "X", "cat": "%s",
"args": { "args": {
"correlation id": %d "correlation id": %d,
"start_ns": %lld,
"end_ns": %lld
} }
}, },
)JSON"), )JSON"),
...@@ -160,7 +175,23 @@ void ChromeTracingLogger::LogRuntimeTraceEventNode( ...@@ -160,7 +175,23 @@ void ChromeTracingLogger::LogRuntimeTraceEventNode(
runtime_node.ThreadId(), nsToUs(runtime_node.StartNs()), runtime_node.ThreadId(), nsToUs(runtime_node.StartNs()),
nsToUs(runtime_node.Duration()), nsToUs(runtime_node.Duration()),
categary_name_[static_cast<int>(runtime_node.Type())], categary_name_[static_cast<int>(runtime_node.Type())],
runtime_node.CorrelationId()); runtime_node.CorrelationId(), runtime_node.StartNs(),
runtime_node.EndNs());
pid_tid_set_.insert({runtime_node.ProcessId(), runtime_node.ThreadId()});
output_file_stream_ << string_format(
std::string(
R"JSON(
{
"name": "launch", "id": %d, "pid": %lld, "tid": "%lld(C++)",
"ts": %lld,
"ph": "s", "cat": "async"
},
)JSON"),
runtime_node.CorrelationId(), runtime_node.ProcessId(),
runtime_node.ThreadId(),
nsToUs((runtime_node.StartNs() + runtime_node.EndNs()) >> 1));
pid_tid_set_.insert({runtime_node.ProcessId(), runtime_node.ThreadId()});
} }
void ChromeTracingLogger::LogDeviceTraceEventNode( void ChromeTracingLogger::LogDeviceTraceEventNode(
...@@ -180,6 +211,36 @@ void ChromeTracingLogger::LogDeviceTraceEventNode( ...@@ -180,6 +211,36 @@ void ChromeTracingLogger::LogDeviceTraceEventNode(
default: default:
break; break;
} }
if (nsToUs(device_node.Duration()) == 0) {
output_file_stream_ << string_format(
std::string(
R"JSON(
{
"name": "launch", "id": %d, "pid": %lld, "tid": %lld,
"ts": %lld,
"ph": "f", "cat": "async"
},
)JSON"),
device_node.CorrelationId(), device_node.DeviceId(),
device_node.StreamId(), nsToUs(device_node.StartNs()));
deviceid_streamid_set_.insert(
{device_node.DeviceId(), device_node.StreamId()});
} else {
output_file_stream_ << string_format(
std::string(
R"JSON(
{
"name": "launch", "id": %d, "pid": %lld, "tid": %lld,
"ts": %lld,
"ph": "f", "cat": "async", "bp": "e"
},
)JSON"),
device_node.CorrelationId(), device_node.DeviceId(),
device_node.StreamId(),
nsToUs((device_node.StartNs() + device_node.EndNs()) >> 1));
deviceid_streamid_set_.insert(
{device_node.DeviceId(), device_node.StreamId()});
}
} }
void ChromeTracingLogger::HandleTypeKernel( void ChromeTracingLogger::HandleTypeKernel(
...@@ -188,16 +249,21 @@ void ChromeTracingLogger::HandleTypeKernel( ...@@ -188,16 +249,21 @@ void ChromeTracingLogger::HandleTypeKernel(
float blocks_per_sm = 0.0; float blocks_per_sm = 0.0;
float warps_per_sm = 0.0; float warps_per_sm = 0.0;
float occupancy = 0.0; float occupancy = 0.0;
#if defined(PADDLE_WITH_CUDA) #if defined(PADDLE_WITH_CUPTI)
constexpr int threads_per_warp = 32; constexpr int threads_per_warp = 32;
const gpuDeviceProp& device_property = const gpuDeviceProp& device_property =
GetDeviceProperties(device_node.DeviceId()); GetDeviceProperties(device_node.DeviceId());
blocks_per_sm = blocks_per_sm = static_cast<float>(kernel_info.grid_x * kernel_info.grid_y *
(kernel_info.grid_x * kernel_info.grid_y * kernel_info.grid_z) / kernel_info.grid_z) /
device_property.multiProcessorCount; device_property.multiProcessorCount;
warps_per_sm = blocks_per_sm * (kernel_info.block_x * kernel_info.block_y * warps_per_sm = blocks_per_sm * (kernel_info.block_x * kernel_info.block_y *
kernel_info.block_z) / kernel_info.block_z) /
threads_per_warp; threads_per_warp;
occupancy = CalculateEstOccupancy(
device_node.DeviceId(), kernel_info.registers_per_thread,
kernel_info.static_shared_memory, kernel_info.dynamic_shared_memory,
kernel_info.block_x, kernel_info.block_y, kernel_info.block_z,
blocks_per_sm);
#endif #endif
output_file_stream_ << string_format( output_file_stream_ << string_format(
...@@ -208,15 +274,17 @@ void ChromeTracingLogger::HandleTypeKernel( ...@@ -208,15 +274,17 @@ void ChromeTracingLogger::HandleTypeKernel(
"ts": %lld, "dur": %lld, "ts": %lld, "dur": %lld,
"ph": "X", "cat": "%s", "ph": "X", "cat": "%s",
"args": { "args": {
"start_ns": %lld,
"end_ns": %lld,
"device": %d, "context": %d, "device": %d, "context": %d,
"stream": %d, "correlation id": %d, "stream": %d, "correlation id": %d,
"registers per thread": %d, "registers per thread": %d,
"shared memory": %f, "shared memory": %d,
"blocks per SM": %f, "blocks per SM": %f,
"warps per SM": %f, "warps per SM": %f,
"grid": [%d, %d, %d], "grid": [%d, %d, %d],
"block": [%d, %d, %d], "block": [%d, %d, %d],
"est. achieved occupancy %": %f "theoretical achieved occupancy %%": %f
} }
}, },
)JSON"), )JSON"),
...@@ -224,12 +292,13 @@ void ChromeTracingLogger::HandleTypeKernel( ...@@ -224,12 +292,13 @@ void ChromeTracingLogger::HandleTypeKernel(
device_node.StreamId(), nsToUs(device_node.StartNs()), device_node.StreamId(), nsToUs(device_node.StartNs()),
nsToUs(device_node.Duration()), nsToUs(device_node.Duration()),
categary_name_[static_cast<int>(device_node.Type())], categary_name_[static_cast<int>(device_node.Type())],
device_node.DeviceId(), device_node.ContextId(), device_node.StreamId(), device_node.StartNs(), device_node.EndNs(), device_node.DeviceId(),
device_node.ContextId(), device_node.StreamId(),
device_node.CorrelationId(), kernel_info.registers_per_thread, device_node.CorrelationId(), kernel_info.registers_per_thread,
kernel_info.static_shared_memory + kernel_info.dynamic_shared_memory, kernel_info.static_shared_memory + kernel_info.dynamic_shared_memory,
blocks_per_sm, warps_per_sm, kernel_info.grid_x, kernel_info.grid_y, blocks_per_sm, warps_per_sm, kernel_info.grid_x, kernel_info.grid_y,
kernel_info.grid_z, kernel_info.block_x, kernel_info.block_y, kernel_info.grid_z, kernel_info.block_x, kernel_info.block_y,
kernel_info.block_z, occupancy); kernel_info.block_z, occupancy * 100);
} }
void ChromeTracingLogger::HandleTypeMemcpy( void ChromeTracingLogger::HandleTypeMemcpy(
...@@ -247,6 +316,8 @@ void ChromeTracingLogger::HandleTypeMemcpy( ...@@ -247,6 +316,8 @@ void ChromeTracingLogger::HandleTypeMemcpy(
"ts": %lld, "dur": %lld, "ts": %lld, "dur": %lld,
"ph": "X", "cat": "%s", "ph": "X", "cat": "%s",
"args": { "args": {
"start_ns": %lld,
"end_ns": %lld,
"stream": %d, "correlation id": %d, "stream": %d, "correlation id": %d,
"bytes": %d, "memory bandwidth (GB/s)": %f "bytes": %d, "memory bandwidth (GB/s)": %f
} }
...@@ -256,8 +327,8 @@ void ChromeTracingLogger::HandleTypeMemcpy( ...@@ -256,8 +327,8 @@ void ChromeTracingLogger::HandleTypeMemcpy(
device_node.StreamId(), nsToUs(device_node.StartNs()), device_node.StreamId(), nsToUs(device_node.StartNs()),
nsToUs(device_node.Duration()), nsToUs(device_node.Duration()),
categary_name_[static_cast<int>(device_node.Type())], categary_name_[static_cast<int>(device_node.Type())],
device_node.StreamId(), device_node.CorrelationId(), device_node.StartNs(), device_node.EndNs(), device_node.StreamId(),
memcpy_info.num_bytes, memory_bandwidth); device_node.CorrelationId(), memcpy_info.num_bytes, memory_bandwidth);
} }
void ChromeTracingLogger::HandleTypeMemset( void ChromeTracingLogger::HandleTypeMemset(
...@@ -271,6 +342,8 @@ void ChromeTracingLogger::HandleTypeMemset( ...@@ -271,6 +342,8 @@ void ChromeTracingLogger::HandleTypeMemset(
"ts": %lld, "dur": %lld, "ts": %lld, "dur": %lld,
"ph": "X", "cat": "%s", "ph": "X", "cat": "%s",
"args": { "args": {
"start_ns": %lld,
"end_ns": %lld,
"device": %d, "context": %d, "device": %d, "context": %d,
"stream": %d, "correlation id": %d, "stream": %d, "correlation id": %d,
"bytes": %d, "value": %d "bytes": %d, "value": %d
...@@ -281,7 +354,8 @@ void ChromeTracingLogger::HandleTypeMemset( ...@@ -281,7 +354,8 @@ void ChromeTracingLogger::HandleTypeMemset(
device_node.StreamId(), nsToUs(device_node.StartNs()), device_node.StreamId(), nsToUs(device_node.StartNs()),
nsToUs(device_node.Duration()), nsToUs(device_node.Duration()),
categary_name_[static_cast<int>(device_node.Type())], categary_name_[static_cast<int>(device_node.Type())],
device_node.DeviceId(), device_node.ContextId(), device_node.StreamId(), device_node.StartNs(), device_node.EndNs(), device_node.DeviceId(),
device_node.ContextId(), device_node.StreamId(),
device_node.CorrelationId(), memset_info.num_bytes, memset_info.value); device_node.CorrelationId(), memset_info.num_bytes, memset_info.value);
} }
...@@ -290,10 +364,10 @@ void ChromeTracingLogger::StartLog() { ...@@ -290,10 +364,10 @@ void ChromeTracingLogger::StartLog() {
R"JSON( R"JSON(
{ {
"schemaVersion": "%s", "schemaVersion": "%s",
"displayTimeUnit": "us", "displayTimeUnit": "ms",
"SpanNumber": "%d", "span_indx": "%d",
)JSON"), )JSON"),
kSchemaVersion, num_span); kSchemaVersion, span_indx++);
// add device property information // add device property information
#if defined(PADDLE_WITH_CUDA) #if defined(PADDLE_WITH_CUDA)
output_file_stream_ << std::string(R"JSON( output_file_stream_ << std::string(R"JSON(
...@@ -358,11 +432,143 @@ void ChromeTracingLogger::StartLog() { ...@@ -358,11 +432,143 @@ void ChromeTracingLogger::StartLog() {
)JSON"); )JSON");
} }
void ChromeTracingLogger::EndLog() { void ChromeTracingLogger::LogMetaInfo(
const std::unordered_map<std::string, std::string> extra_info) {
RefineDisplayName(extra_info);
output_file_stream_ << std::string( output_file_stream_ << std::string(
R"JSON( R"JSON(
{} {}
] ],
)JSON");
output_file_stream_ << std::string(R"JSON(
"ExtraInfo": {)JSON");
size_t count = extra_info.size();
for (const auto& kv : extra_info) {
if (count > 1) {
output_file_stream_ << string_format(std::string(R"JSON(
"%s": "%s",
)JSON"),
kv.first.c_str(), kv.second.c_str());
} else {
output_file_stream_ << string_format(std::string(R"JSON(
"%s": "%s"
)JSON"),
kv.first.c_str(), kv.second.c_str());
}
count--;
}
output_file_stream_ << std::string(R"JSON(
})JSON");
}
void ChromeTracingLogger::RefineDisplayName(
std::unordered_map<std::string, std::string> extra_info) {
for (auto it = pid_tid_set_.begin(); it != pid_tid_set_.end(); ++it) {
output_file_stream_ << string_format(
std::string(
R"JSON(
{
"name": "process_name", "pid": %lld, "tid": "%lld(Python)",
"ph": "M",
"args": {
"name": "Process %lld (CPU)"
}
},
{
"name": "process_name", "pid": %lld, "tid": "%lld(C++)",
"ph": "M",
"args": {
"name": "Process %lld (CPU)"
}
},
{
"name": "thread_name", "pid": %lld, "tid": "%lld(Python)",
"ph": "M",
"args": {
"name": "thread %lld:%s(Python)"
}
},
{
"name": "thread_name", "pid": %lld, "tid": "%lld(C++)",
"ph": "M",
"args": {
"name": "thread %lld:%s(C++)"
}
},
{
"name": "process_sort_index", "pid": %lld, "tid": %lld,
"ph": "M",
"args": {
"sort_index": %lld
}
},
{
"name": "thread_sort_index", "pid": %lld, "tid": "%lld(Python)",
"ph": "M",
"args": {
"sort_index": %lld
}
},
{
"name": "thread_sort_index", "pid": %lld, "tid": "%lld(C++)",
"ph": "M",
"args": {
"sort_index": %lld
}
},
)JSON"),
(*it).first, (*it).second, (*it).first, (*it).first, (*it).second,
(*it).first, (*it).first, (*it).second, (*it).second,
extra_info[string_format(std::string("%lld"), (*it).second)].c_str(),
(*it).first, (*it).second, (*it).second,
extra_info[string_format(std::string("%lld"), (*it).second)].c_str(),
(*it).first, (*it).second, (*it).first, (*it).first, (*it).second,
(*it).second * 2, (*it).first, (*it).second, (*it).second * 2 + 1);
}
for (auto it = deviceid_streamid_set_.begin();
it != deviceid_streamid_set_.end(); ++it) {
output_file_stream_ << string_format(
std::string(
R"JSON(
{
"name": "process_name", "pid": %lld, "tid": %lld,
"ph": "M",
"args": {
"name": "Deivce %lld (GPU)"
}
},
{
"name": "thread_name", "pid": %lld, "tid": %lld,
"ph": "M",
"args": {
"name": "stream %lld"
}
},
{
"name": "process_sort_index", "pid": %lld, "tid": %lld,
"ph": "M",
"args": {
"sort_index": %lld
}
},
{
"name": "thread_sort_index", "pid": %lld, "tid": %lld,
"ph": "M",
"args": {
"sort_index": %lld
}
},
)JSON"),
(*it).first, (*it).second, (*it).first, (*it).first, (*it).second,
(*it).second, (*it).first, (*it).second, (*it).first + 0x10000000,
(*it).first, (*it).second, (*it).second);
}
}
void ChromeTracingLogger::EndLog() {
output_file_stream_ << std::string(
R"JSON(
} }
)JSON"); )JSON");
} }
......
...@@ -13,11 +13,18 @@ See the License for the specific language governing permissions and ...@@ -13,11 +13,18 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <set>
#include <unordered_map>
#include <utility>
#include "paddle/fluid/platform/profiler/output_logger.h" #include "paddle/fluid/platform/profiler/output_logger.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
// Dump a NodeTrees into a chrome tracing file.
// A ChromeTracingLogger object can only dump a NodeTrees object,
// creates a file in the constructor and closes the file in the destructor.
// should only call LogNodeTrees and LogMetaInfo in order.
class ChromeTracingLogger : public BaseLogger { class ChromeTracingLogger : public BaseLogger {
public: public:
explicit ChromeTracingLogger(const std::string& filename); explicit ChromeTracingLogger(const std::string& filename);
...@@ -28,6 +35,7 @@ class ChromeTracingLogger : public BaseLogger { ...@@ -28,6 +35,7 @@ class ChromeTracingLogger : public BaseLogger {
void LogHostTraceEventNode(const HostTraceEventNode&) override; void LogHostTraceEventNode(const HostTraceEventNode&) override;
void LogRuntimeTraceEventNode(const CudaRuntimeTraceEventNode&) override; void LogRuntimeTraceEventNode(const CudaRuntimeTraceEventNode&) override;
void LogNodeTrees(const NodeTrees&) override; void LogNodeTrees(const NodeTrees&) override;
void LogMetaInfo(const std::unordered_map<std::string, std::string>);
private: private:
void OpenFile(); void OpenFile();
...@@ -36,9 +44,12 @@ class ChromeTracingLogger : public BaseLogger { ...@@ -36,9 +44,12 @@ class ChromeTracingLogger : public BaseLogger {
void HandleTypeMemcpy(const DeviceTraceEventNode&); void HandleTypeMemcpy(const DeviceTraceEventNode&);
void StartLog(); void StartLog();
void EndLog(); void EndLog();
void RefineDisplayName(std::unordered_map<std::string, std::string>);
std::string filename_; std::string filename_;
std::ofstream output_file_stream_; std::ofstream output_file_stream_;
static const char* categary_name_[]; static const char* categary_name_[];
std::set<std::pair<uint64_t, uint64_t>> pid_tid_set_;
std::set<std::pair<uint64_t, uint64_t>> deviceid_streamid_set_;
}; };
} // namespace platform } // namespace platform
......
...@@ -54,19 +54,16 @@ void CpuUtilization::RecordBeginTimeInfo() { ...@@ -54,19 +54,16 @@ void CpuUtilization::RecordBeginTimeInfo() {
if (stat_file != nullptr) { if (stat_file != nullptr) {
char temp_str[200]; char temp_str[200];
uint64_t temp_lu; uint64_t temp_lu;
while (true) { int retval = fscanf(
int retval = fscanf( stat_file, "%s %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64
stat_file, "%s %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64,
"%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64, temp_str, &system_tms_start_.tms_utime, &nice_time_start_,
temp_str, &system_tms_start_.tms_utime, &nice_time_start_, &system_tms_start_.tms_stime, &idle_start_, &iowait_start_, &irq_start_,
&system_tms_start_.tms_stime, &idle_start_, &iowait_start_, &softirq_start_, &steal_start_, &temp_lu, &temp_lu);
&irq_start_, &softirq_start_, &steal_start_, &temp_lu, &temp_lu); if (retval != 11) {
if (std::string(temp_str).find("cpu") != 0) { LOG(WARNING)
break; << "Failed to read cpu utilization information at record beginning."
} << std::endl;
if (retval != 11) {
return;
}
} }
fclose(stat_file); fclose(stat_file);
} }
...@@ -90,19 +87,17 @@ void CpuUtilization::RecordEndTimeInfo() { ...@@ -90,19 +87,17 @@ void CpuUtilization::RecordEndTimeInfo() {
if (stat_file != nullptr) { if (stat_file != nullptr) {
char temp_str[200]; char temp_str[200];
uint64_t temp_lu; uint64_t temp_lu;
while (true) { int retval = fscanf(
int retval = fscanf( stat_file, "%s %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64
stat_file, "%s %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64,
"%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64, temp_str, &system_tms_end_.tms_utime, &nice_time_end_,
temp_str, &system_tms_end_.tms_utime, &nice_time_end_, &system_tms_end_.tms_stime, &idle_end_, &iowait_end_, &irq_end_,
&system_tms_end_.tms_stime, &idle_end_, &iowait_end_, &irq_end_, &softirq_end_, &steal_end_, &temp_lu, &temp_lu);
&softirq_end_, &steal_end_, &temp_lu, &temp_lu);
if (std::string(temp_str).find("cpu") != 0) { if (retval != 11) {
break; LOG(WARNING)
} << "Failed to read cpu utilization information at record end."
if (retval != 11) { << std::endl;
return;
}
} }
fclose(stat_file); fclose(stat_file);
} }
......
proto_library(nodetreeproto SRCS nodetree.proto) proto_library(nodetreeproto SRCS nodetree.proto)
cc_library(serialization_logger SRCS serialization_logger.cc DEPS nodetreeproto event_node)
cc_library(deserialization_reader SRCS deserialization_reader.cc DEPS nodetreeproto event_node)
cc_test(test_serialization_logger SRCS test_serialization_logger.cc DEPS serialization_logger deserialization_reader event_node)
...@@ -9,8 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -9,8 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/profiler/dump/deserialization_reader.h" #include "paddle/fluid/platform/profiler/dump/deserialization_reader.h"
#include <cstring> #include <cstring>
#include "paddle/fluid/platform/profiler/extra_info.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
...@@ -36,11 +36,19 @@ void DeserializationReader::OpenFile() { ...@@ -36,11 +36,19 @@ void DeserializationReader::OpenFile() {
} }
} }
std::unique_ptr<NodeTrees> DeserializationReader::Parse() { std::unique_ptr<ProfilerResult> DeserializationReader::Parse() {
if (!node_trees_proto_->ParseFromIstream(&input_file_stream_)) { if (!node_trees_proto_->ParseFromIstream(&input_file_stream_)) {
VLOG(2) << "Unable to load node trees in protobuf." << std::endl; VLOG(2) << "Unable to load node trees in protobuf." << std::endl;
return nullptr; return nullptr;
} }
// restore extra info
ExtraInfo extrainfo;
for (auto indx = 0; indx < node_trees_proto_->extra_info_size(); indx++) {
ExtraInfoMap extra_info_map = node_trees_proto_->extra_info(indx);
extrainfo.AddExtraInfo(extra_info_map.key(), std::string("%s"),
extra_info_map.value().c_str());
}
// restore NodeTrees
std::map<uint64_t, HostTraceEventNode*> thread_event_trees_map; std::map<uint64_t, HostTraceEventNode*> thread_event_trees_map;
for (int node_tree_index = 0; for (int node_tree_index = 0;
node_tree_index < node_trees_proto_->thread_trees_size(); node_tree_index < node_trees_proto_->thread_trees_size();
...@@ -95,7 +103,9 @@ std::unique_ptr<NodeTrees> DeserializationReader::Parse() { ...@@ -95,7 +103,9 @@ std::unique_ptr<NodeTrees> DeserializationReader::Parse() {
} }
} }
// restore NodeTrees object // restore NodeTrees object
return std::unique_ptr<NodeTrees>(new NodeTrees(thread_event_trees_map)); std::unique_ptr<NodeTrees> tree(new NodeTrees(thread_event_trees_map));
return std::unique_ptr<ProfilerResult>(
new ProfilerResult(std::move(tree), extrainfo));
} }
DeserializationReader::~DeserializationReader() { DeserializationReader::~DeserializationReader() {
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include "paddle/fluid/platform/profiler/dump/nodetree.pb.h" #include "paddle/fluid/platform/profiler/dump/nodetree.pb.h"
#include "paddle/fluid/platform/profiler/event_node.h" #include "paddle/fluid/platform/profiler/event_python.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
...@@ -24,7 +24,7 @@ class DeserializationReader { ...@@ -24,7 +24,7 @@ class DeserializationReader {
explicit DeserializationReader(const std::string& filename); explicit DeserializationReader(const std::string& filename);
explicit DeserializationReader(const char* filename); explicit DeserializationReader(const char* filename);
~DeserializationReader(); ~DeserializationReader();
std::unique_ptr<NodeTrees> Parse(); std::unique_ptr<ProfilerResult> Parse();
private: private:
void OpenFile(); void OpenFile();
......
...@@ -32,9 +32,21 @@ enum TracerEventTypeProto { ...@@ -32,9 +32,21 @@ enum TracerEventTypeProto {
Memset = 6; Memset = 6;
// Used to mark record defined by user // Used to mark record defined by user
UserDefined = 7; UserDefined = 7;
// A flag to denote the number of current types // Used to mark operator detail, (such as infer shape, compute)
NumTypes = 8; OperatorInner = 8;
} // Used to mark model training or testing perspective, forward process
Forward = 9;
// Used to mark model training perspective, backward process
Backward = 10;
// Used to mark model training perspective, optimization process
Optimization = 11;
// Used to mark distributed training perspective
Communication = 12;
// Used to mark python api
PythonOp = 13;
// Used to mark python level userdefined
PythonUserDefined = 14;
};
message KernelEventInfoProto { message KernelEventInfoProto {
// The X-dimension block size for the kernel. // The X-dimension block size for the kernel.
...@@ -175,7 +187,14 @@ message ThreadNodeTreeProto { ...@@ -175,7 +187,14 @@ message ThreadNodeTreeProto {
repeated HostTraceEventNodeProto host_nodes = 2; repeated HostTraceEventNodeProto host_nodes = 2;
} }
message ExtraInfoMap {
required string key = 1;
required string value = 2;
}
message NodeTreesProto { message NodeTreesProto {
required string version = 1; required string version = 1;
repeated ThreadNodeTreeProto thread_trees = 2; required uint32 span_indx = 2;
repeated ThreadNodeTreeProto thread_trees = 3;
repeated ExtraInfoMap extra_info = 4;
} }
...@@ -13,6 +13,7 @@ limitations under the License. */ ...@@ -13,6 +13,7 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler/dump/serialization_logger.h" #include "paddle/fluid/platform/profiler/dump/serialization_logger.h"
#include "paddle/fluid/platform/profiler/event_node.h" #include "paddle/fluid/platform/profiler/event_node.h"
#include "paddle/fluid/platform/profiler/extra_info.h"
#include "paddle/fluid/platform/profiler/utils.h" #include "paddle/fluid/platform/profiler/utils.h"
namespace paddle { namespace paddle {
...@@ -20,6 +21,7 @@ namespace platform { ...@@ -20,6 +21,7 @@ namespace platform {
static const char* kDefaultFilename = "pid_%s_time_%s.paddle_trace.pb"; static const char* kDefaultFilename = "pid_%s_time_%s.paddle_trace.pb";
static const char* version = "1.0.0"; static const char* version = "1.0.0";
static uint32_t span_indx = 0;
static std::string DefaultFileName() { static std::string DefaultFileName() {
auto pid = GetProcessId(); auto pid = GetProcessId();
...@@ -39,6 +41,7 @@ void SerializationLogger::OpenFile() { ...@@ -39,6 +41,7 @@ void SerializationLogger::OpenFile() {
} }
node_trees_proto_ = new NodeTreesProto(); node_trees_proto_ = new NodeTreesProto();
node_trees_proto_->set_version(std::string(version)); node_trees_proto_->set_version(std::string(version));
node_trees_proto_->set_span_indx(span_indx++);
} }
void SerializationLogger::LogNodeTrees(const NodeTrees& node_trees) { void SerializationLogger::LogNodeTrees(const NodeTrees& node_trees) {
...@@ -240,6 +243,15 @@ void SerializationLogger::HandleTypeMemset( ...@@ -240,6 +243,15 @@ void SerializationLogger::HandleTypeMemset(
device_trace_event); device_trace_event);
} }
void SerializationLogger::LogMetaInfo(
const std::unordered_map<std::string, std::string> extra_info) {
for (const auto& kv : extra_info) {
ExtraInfoMap* extra_info_map = node_trees_proto_->add_extra_info();
extra_info_map->set_key(kv.first);
extra_info_map->set_value(kv.second);
}
}
SerializationLogger::SerializationLogger(const std::string& filename) { SerializationLogger::SerializationLogger(const std::string& filename) {
filename_ = filename.empty() ? DefaultFileName() : filename; filename_ = filename.empty() ? DefaultFileName() : filename;
OpenFile(); OpenFile();
......
...@@ -11,6 +11,8 @@ limitations under the License. */ ...@@ -11,6 +11,8 @@ limitations under the License. */
#pragma once #pragma once
#include <unordered_map>
#include "paddle/fluid/platform/profiler/dump/nodetree.pb.h" #include "paddle/fluid/platform/profiler/dump/nodetree.pb.h"
#include "paddle/fluid/platform/profiler/output_logger.h" #include "paddle/fluid/platform/profiler/output_logger.h"
...@@ -20,6 +22,7 @@ namespace platform { ...@@ -20,6 +22,7 @@ namespace platform {
// Dump a NodeTrees into a profobuf file. // Dump a NodeTrees into a profobuf file.
// A SerializationLogger object can only dump a NodeTrees object, // A SerializationLogger object can only dump a NodeTrees object,
// creates a file in the constructor and closes the file in the destructor. // creates a file in the constructor and closes the file in the destructor.
// Should only call LogNodeTrees and LogMetaInfo.
class SerializationLogger : public BaseLogger { class SerializationLogger : public BaseLogger {
public: public:
explicit SerializationLogger(const std::string& filename); explicit SerializationLogger(const std::string& filename);
...@@ -30,12 +33,14 @@ class SerializationLogger : public BaseLogger { ...@@ -30,12 +33,14 @@ class SerializationLogger : public BaseLogger {
void LogHostTraceEventNode(const HostTraceEventNode&) override; void LogHostTraceEventNode(const HostTraceEventNode&) override;
void LogRuntimeTraceEventNode(const CudaRuntimeTraceEventNode&) override; void LogRuntimeTraceEventNode(const CudaRuntimeTraceEventNode&) override;
void LogNodeTrees(const NodeTrees&) override; void LogNodeTrees(const NodeTrees&) override;
void LogMetaInfo(const std::unordered_map<std::string, std::string>);
private: private:
void OpenFile(); void OpenFile();
void HandleTypeKernel(const DeviceTraceEventNode&); void HandleTypeKernel(const DeviceTraceEventNode&);
void HandleTypeMemset(const DeviceTraceEventNode&); void HandleTypeMemset(const DeviceTraceEventNode&);
void HandleTypeMemcpy(const DeviceTraceEventNode&); void HandleTypeMemcpy(const DeviceTraceEventNode&);
std::string filename_; std::string filename_;
std::ofstream output_file_stream_; std::ofstream output_file_stream_;
NodeTreesProto* node_trees_proto_; NodeTreesProto* node_trees_proto_;
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "paddle/fluid/platform/profiler/dump/deserialization_reader.h" #include "paddle/fluid/platform/profiler/dump/deserialization_reader.h"
#include "paddle/fluid/platform/profiler/dump/serialization_logger.h" #include "paddle/fluid/platform/profiler/dump/serialization_logger.h"
#include "paddle/fluid/platform/profiler/event_node.h" #include "paddle/fluid/platform/profiler/event_node.h"
#include "paddle/fluid/platform/profiler/event_python.h"
using paddle::platform::SerializationLogger; using paddle::platform::SerializationLogger;
using paddle::platform::DeserializationReader; using paddle::platform::DeserializationReader;
...@@ -31,6 +32,7 @@ using paddle::platform::TracerEventType; ...@@ -31,6 +32,7 @@ using paddle::platform::TracerEventType;
using paddle::platform::KernelEventInfo; using paddle::platform::KernelEventInfo;
using paddle::platform::MemcpyEventInfo; using paddle::platform::MemcpyEventInfo;
using paddle::platform::MemsetEventInfo; using paddle::platform::MemsetEventInfo;
using paddle::platform::ProfilerResult;
TEST(SerializationLoggerTest, dump_case0) { TEST(SerializationLoggerTest, dump_case0) {
std::list<HostTraceEvent> host_events; std::list<HostTraceEvent> host_events;
...@@ -149,7 +151,8 @@ TEST(SerializationLoggerTest, dump_case1) { ...@@ -149,7 +151,8 @@ TEST(SerializationLoggerTest, dump_case1) {
TEST(DeserializationReaderTest, restore_case0) { TEST(DeserializationReaderTest, restore_case0) {
DeserializationReader reader("test_serialization_logger_case0.pb"); DeserializationReader reader("test_serialization_logger_case0.pb");
std::unique_ptr<NodeTrees> tree = reader.Parse(); auto profiler_result = reader.Parse();
auto& tree = profiler_result->GetNodeTrees();
std::map<uint64_t, std::vector<HostTraceEventNode*>> nodes = std::map<uint64_t, std::vector<HostTraceEventNode*>> nodes =
tree->Traverse(true); tree->Traverse(true);
EXPECT_EQ(nodes[10].size(), 4u); EXPECT_EQ(nodes[10].size(), 4u);
...@@ -172,3 +175,26 @@ TEST(DeserializationReaderTest, restore_case0) { ...@@ -172,3 +175,26 @@ TEST(DeserializationReaderTest, restore_case0) {
} }
} }
} }
TEST(DeserializationReaderTest, restore_case1) {
DeserializationReader reader("test_serialization_logger_case1.pb");
auto profiler_result = reader.Parse();
auto& tree = profiler_result->GetNodeTrees();
std::map<uint64_t, std::vector<HostTraceEventNode*>> nodes =
tree->Traverse(true);
EXPECT_EQ(nodes[10].size(), 1u);
EXPECT_EQ(nodes[11].size(), 1u);
std::vector<HostTraceEventNode*> thread1_nodes = nodes[10];
std::vector<HostTraceEventNode*> thread2_nodes = nodes[11];
for (auto it = thread1_nodes.begin(); it != thread1_nodes.end(); it++) {
if ((*it)->Name() == "root node") {
EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 3u);
}
}
for (auto it = thread2_nodes.begin(); it != thread2_nodes.end(); it++) {
if ((*it)->Name() == "root node") {
EXPECT_EQ((*it)->GetChildren().size(), 0u);
EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 2u);
}
}
}
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/profiler/event_python.h"
#include "paddle/fluid/platform/profiler/chrometracing_logger.h"
#include "paddle/fluid/platform/profiler/dump/deserialization_reader.h"
#include "paddle/fluid/platform/profiler/dump/serialization_logger.h"
#include "paddle/fluid/platform/profiler/extra_info.h"
namespace paddle {
namespace platform {
HostPythonNode::~HostPythonNode() {
// delete all runtime or device nodes and recursive delete children
for (auto it = children_node_ptrs.begin(); it != children_node_ptrs.end();
++it) {
delete *it;
}
for (auto it = runtime_node_ptrs.begin(); it != runtime_node_ptrs.end();
++it) {
delete *it;
}
for (auto it = device_node_ptrs.begin(); it != device_node_ptrs.end(); ++it) {
delete *it;
}
}
HostPythonNode* ProfilerResult::CopyTree(HostTraceEventNode* root) {
// Copy and transfer EventNode in NodeTree to PythonNode
if (root == nullptr) {
return nullptr;
}
// copy HostTraceEventNode and its children
HostPythonNode* host_python_node = new HostPythonNode();
host_python_node->name = root->Name();
host_python_node->type = root->Type();
host_python_node->start_ns = root->StartNs();
host_python_node->end_ns = root->EndNs();
host_python_node->process_id = root->ProcessId();
host_python_node->thread_id = root->ThreadId();
for (auto it = root->GetChildren().begin(); it != root->GetChildren().end();
++it) {
host_python_node->children_node_ptrs.push_back(CopyTree(*it));
}
// copy its CudaRuntimeTraceEventNode
for (auto runtimenode = root->GetRuntimeTraceEventNodes().begin();
runtimenode != root->GetRuntimeTraceEventNodes().end(); ++runtimenode) {
HostPythonNode* runtime_python_node = new HostPythonNode();
runtime_python_node->name = (*runtimenode)->Name();
runtime_python_node->type = (*runtimenode)->Type();
runtime_python_node->start_ns = (*runtimenode)->StartNs();
runtime_python_node->end_ns = (*runtimenode)->EndNs();
runtime_python_node->process_id = (*runtimenode)->ProcessId();
runtime_python_node->thread_id = (*runtimenode)->ThreadId();
host_python_node->runtime_node_ptrs.push_back(runtime_python_node);
// copy DeviceTraceEventNode
for (auto devicenode = (*runtimenode)->GetDeviceTraceEventNodes().begin();
devicenode != (*runtimenode)->GetDeviceTraceEventNodes().end();
++devicenode) {
DevicePythonNode* device_python_node = new DevicePythonNode();
device_python_node->name = (*devicenode)->Name();
device_python_node->type = (*devicenode)->Type();
device_python_node->start_ns = (*devicenode)->StartNs();
device_python_node->end_ns = (*devicenode)->EndNs();
device_python_node->device_id = (*devicenode)->DeviceId();
device_python_node->context_id = (*devicenode)->ContextId();
device_python_node->stream_id = (*devicenode)->StreamId();
runtime_python_node->device_node_ptrs.push_back(device_python_node);
}
}
return host_python_node;
}
ProfilerResult::ProfilerResult(std::unique_ptr<NodeTrees> tree,
const ExtraInfo& extra_info)
: tree_(std::move(tree)), extra_info_(extra_info) {
if (tree_ != nullptr) {
std::map<uint64_t, HostTraceEventNode*> nodetrees = tree_->GetNodeTrees();
for (auto it = nodetrees.begin(); it != nodetrees.end(); ++it) {
thread_event_trees_map_[it->first] = CopyTree(it->second);
}
}
}
ProfilerResult::~ProfilerResult() {
// delete all root nodes
for (auto it = thread_event_trees_map_.begin();
it != thread_event_trees_map_.end(); ++it) {
delete it->second;
}
}
void ProfilerResult::Save(const std::string& file_name,
const std::string format) {
if (format == std::string("json")) {
ChromeTracingLogger logger(file_name);
tree_->LogMe(&logger);
logger.LogMetaInfo(GetExtraInfo());
} else if (format == std::string("pb")) {
SerializationLogger logger(file_name);
tree_->LogMe(&logger);
logger.LogMetaInfo(GetExtraInfo());
}
return;
}
std::unique_ptr<ProfilerResult> LoadProfilerResult(std::string filename) {
DeserializationReader reader(filename);
std::unique_ptr<ProfilerResult> result = reader.Parse();
return result;
}
} // namespace platform
} // namespace paddle
...@@ -15,8 +15,11 @@ limitations under the License. */ ...@@ -15,8 +15,11 @@ limitations under the License. */
#pragma once #pragma once
#include <map> #include <map>
#include <memory>
#include <unordered_map>
#include "paddle/fluid/platform/profiler/event_node.h" #include "paddle/fluid/platform/profiler/event_node.h"
#include "paddle/fluid/platform/profiler/extra_info.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
...@@ -66,18 +69,29 @@ struct HostPythonNode { ...@@ -66,18 +69,29 @@ struct HostPythonNode {
class ProfilerResult { class ProfilerResult {
public: public:
ProfilerResult() : tree_(nullptr) {} ProfilerResult() : tree_(nullptr) {}
explicit ProfilerResult(NodeTrees* tree); explicit ProfilerResult(std::unique_ptr<NodeTrees> tree,
const ExtraInfo& extra_info);
~ProfilerResult(); ~ProfilerResult();
std::map<uint64_t, HostPythonNode*> GetData() { std::map<uint64_t, HostPythonNode*> GetData() {
return thread_event_trees_map; return thread_event_trees_map_;
} }
void Save(const std::string& file_name); std::unordered_map<std::string, std::string> GetExtraInfo() {
return extra_info_.GetExtraInfo();
}
void Save(const std::string& file_name,
const std::string format = std::string("json"));
std::unique_ptr<NodeTrees>& GetNodeTrees() { return tree_; }
private: private:
std::map<uint64_t, HostPythonNode*> thread_event_trees_map; std::map<uint64_t, HostPythonNode*> thread_event_trees_map_;
NodeTrees* tree_; std::unique_ptr<NodeTrees> tree_;
HostPythonNode* CopyTree(HostTraceEventNode* node); ExtraInfo extra_info_;
HostPythonNode* CopyTree(HostTraceEventNode* root);
}; };
std::unique_ptr<ProfilerResult> LoadProfilerResult(std::string filename);
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -25,8 +25,10 @@ ...@@ -25,8 +25,10 @@
#endif #endif
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/profiler/cuda_tracer.h" #include "paddle/fluid/platform/profiler/cuda_tracer.h"
#include "paddle/fluid/platform/profiler/extra_info.h"
#include "paddle/fluid/platform/profiler/host_tracer.h" #include "paddle/fluid/platform/profiler/host_tracer.h"
#include "paddle/fluid/platform/profiler/trace_event_collector.h" #include "paddle/fluid/platform/profiler/trace_event_collector.h"
#include "paddle/fluid/platform/profiler/utils.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
...@@ -44,10 +46,15 @@ std::unique_ptr<Profiler> Profiler::Create(const ProfilerOptions& options) { ...@@ -44,10 +46,15 @@ std::unique_ptr<Profiler> Profiler::Create(const ProfilerOptions& options) {
Profiler::Profiler(const ProfilerOptions& options) { Profiler::Profiler(const ProfilerOptions& options) {
options_ = options; options_ = options;
HostTracerOptions host_tracer_options; std::bitset<32> trace_switch(options_.trace_switch);
host_tracer_options.trace_level = options.trace_level; if (trace_switch.test(kProfileCPUOptionBit)) {
tracers_.emplace_back(new HostTracer(host_tracer_options), true); HostTracerOptions host_tracer_options;
tracers_.emplace_back(&CudaTracer::GetInstance(), false); host_tracer_options.trace_level = options_.trace_level;
tracers_.emplace_back(new HostTracer(host_tracer_options), true);
}
if (trace_switch.test(kProfileGPUOptionBit)) {
tracers_.emplace_back(&CudaTracer::GetInstance(), false);
}
} }
Profiler::~Profiler() { alive_.store(false); } Profiler::~Profiler() { alive_.store(false); }
...@@ -63,9 +70,10 @@ void Profiler::Start() { ...@@ -63,9 +70,10 @@ void Profiler::Start() {
for (auto& tracer : tracers_) { for (auto& tracer : tracers_) {
tracer.Get().StartTracing(); tracer.Get().StartTracing();
} }
cpu_utilization_.RecordBeginTimeInfo();
} }
std::unique_ptr<NodeTrees> Profiler::Stop() { std::unique_ptr<ProfilerResult> Profiler::Stop() {
SynchronizeAllDevice(); SynchronizeAllDevice();
TraceEventCollector collector; TraceEventCollector collector;
for (auto& tracer : tracers_) { for (auto& tracer : tracers_) {
...@@ -75,7 +83,22 @@ std::unique_ptr<NodeTrees> Profiler::Stop() { ...@@ -75,7 +83,22 @@ std::unique_ptr<NodeTrees> Profiler::Stop() {
std::unique_ptr<NodeTrees> tree(new NodeTrees(collector.HostEvents(), std::unique_ptr<NodeTrees> tree(new NodeTrees(collector.HostEvents(),
collector.RuntimeEvents(), collector.RuntimeEvents(),
collector.DeviceEvents())); collector.DeviceEvents()));
return tree; cpu_utilization_.RecordEndTimeInfo();
ExtraInfo extrainfo;
extrainfo.AddExtraInfo(std::string("System Cpu Utilization"),
std::string("%f"),
cpu_utilization_.GetCpuUtilization());
extrainfo.AddExtraInfo(std::string("Process Cpu Utilization"),
std::string("%f"),
cpu_utilization_.GetCpuCurProcessUtilization());
const std::unordered_map<uint64_t, std::string> thread_names =
collector.ThreadNames();
for (const auto& kv : thread_names) {
extrainfo.AddExtraInfo(string_format(std::string("%llu"), kv.first),
kv.second);
}
return std::unique_ptr<ProfilerResult>(
new platform::ProfilerResult(std::move(tree), extrainfo));
} }
} // namespace platform } // namespace platform
......
...@@ -15,12 +15,15 @@ ...@@ -15,12 +15,15 @@
#pragma once #pragma once
#include <atomic> #include <atomic>
#include <bitset>
#include <cstdint> #include <cstdint>
#include <functional> #include <functional>
#include <list> #include <list>
#include <memory> #include <memory>
#include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/profiler/cpu_utilization.h"
#include "paddle/fluid/platform/profiler/event_node.h" #include "paddle/fluid/platform/profiler/event_node.h"
#include "paddle/fluid/platform/profiler/event_python.h"
#include "paddle/fluid/platform/profiler/tracer_base.h" #include "paddle/fluid/platform/profiler/tracer_base.h"
DECLARE_int64(host_trace_level); DECLARE_int64(host_trace_level);
...@@ -28,7 +31,11 @@ DECLARE_int64(host_trace_level); ...@@ -28,7 +31,11 @@ DECLARE_int64(host_trace_level);
namespace paddle { namespace paddle {
namespace platform { namespace platform {
static constexpr uint32_t kProfileCPUOptionBit = 0;
static constexpr uint32_t kProfileGPUOptionBit = 1;
struct ProfilerOptions { struct ProfilerOptions {
uint32_t trace_switch = 0; // bit 0: cpu, bit 1: gpu
uint32_t trace_level = FLAGS_host_trace_level; uint32_t trace_level = FLAGS_host_trace_level;
}; };
...@@ -40,7 +47,7 @@ class Profiler { ...@@ -40,7 +47,7 @@ class Profiler {
void Start(); void Start();
std::unique_ptr<NodeTrees> Stop(); std::unique_ptr<ProfilerResult> Stop();
~Profiler(); ~Profiler();
...@@ -70,6 +77,7 @@ class Profiler { ...@@ -70,6 +77,7 @@ class Profiler {
ProfilerOptions options_; ProfilerOptions options_;
uint64_t start_ns_ = UINT64_MAX; uint64_t start_ns_ = UINT64_MAX;
std::list<TracerHolder> tracers_; std::list<TracerHolder> tracers_;
CpuUtilization cpu_utilization_;
}; };
} // namespace platform } // namespace platform
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
#endif #endif
#include "paddle/fluid/platform/profiler/event_python.h"
#include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/fluid/platform/profiler/profiler.h" #include "paddle/fluid/platform/profiler/profiler.h"
...@@ -30,8 +31,10 @@ TEST(ProfilerTest, TestHostTracer) { ...@@ -30,8 +31,10 @@ TEST(ProfilerTest, TestHostTracer) {
using paddle::platform::Profiler; using paddle::platform::Profiler;
using paddle::platform::RecordInstantEvent; using paddle::platform::RecordInstantEvent;
using paddle::platform::TracerEventType; using paddle::platform::TracerEventType;
using paddle::platform::ProfilerResult;
ProfilerOptions options; ProfilerOptions options;
options.trace_level = 2; options.trace_level = 2;
options.trace_switch = 3;
auto profiler = Profiler::Create(options); auto profiler = Profiler::Create(options);
EXPECT_TRUE(profiler); EXPECT_TRUE(profiler);
profiler->Prepare(); profiler->Prepare();
...@@ -42,7 +45,8 @@ TEST(ProfilerTest, TestHostTracer) { ...@@ -42,7 +45,8 @@ TEST(ProfilerTest, TestHostTracer) {
RecordInstantEvent("TestTraceLevel_record2", TracerEventType::UserDefined, RecordInstantEvent("TestTraceLevel_record2", TracerEventType::UserDefined,
3); 3);
} }
auto nodetree = profiler->Stop(); auto profiler_result = profiler->Stop();
auto& nodetree = profiler_result->GetNodeTrees();
std::set<std::string> host_events; std::set<std::string> host_events;
for (const auto pair : nodetree->Traverse(true)) { for (const auto pair : nodetree->Traverse(true)) {
for (const auto evt : pair.second) { for (const auto evt : pair.second) {
...@@ -56,8 +60,10 @@ TEST(ProfilerTest, TestHostTracer) { ...@@ -56,8 +60,10 @@ TEST(ProfilerTest, TestHostTracer) {
TEST(ProfilerTest, TestCudaTracer) { TEST(ProfilerTest, TestCudaTracer) {
using paddle::platform::ProfilerOptions; using paddle::platform::ProfilerOptions;
using paddle::platform::Profiler; using paddle::platform::Profiler;
using paddle::platform::ProfilerResult;
ProfilerOptions options; ProfilerOptions options;
options.trace_level = 0; options.trace_level = 0;
options.trace_switch = 3;
auto profiler = Profiler::Create(options); auto profiler = Profiler::Create(options);
EXPECT_TRUE(profiler); EXPECT_TRUE(profiler);
profiler->Prepare(); profiler->Prepare();
...@@ -72,7 +78,8 @@ TEST(ProfilerTest, TestCudaTracer) { ...@@ -72,7 +78,8 @@ TEST(ProfilerTest, TestCudaTracer) {
hipStreamCreate(&stream); hipStreamCreate(&stream);
hipStreamSynchronize(stream); hipStreamSynchronize(stream);
#endif #endif
auto nodetree = profiler->Stop(); auto profiler_result = profiler->Stop();
auto& nodetree = profiler_result->GetNodeTrees();
std::vector<std::string> runtime_events; std::vector<std::string> runtime_events;
for (const auto pair : nodetree->Traverse(true)) { for (const auto pair : nodetree->Traverse(true)) {
for (const auto host_node : pair.second) { for (const auto host_node : pair.second) {
......
...@@ -48,6 +48,8 @@ enum class TracerEventType { ...@@ -48,6 +48,8 @@ enum class TracerEventType {
Communication = 12, Communication = 12,
// Used to mark python api // Used to mark python api
PythonOp = 13, PythonOp = 13,
// Used to mark python level userdefined
PythonUserDefined = 14,
// A flag to denote the number of current types // A flag to denote the number of current types
NumTypes NumTypes
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册