diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index b12ad552aba6e6e599689c05c23ae306110aa78f..b91ee3c2d633dff148e9118ea5c3bea50768ca89 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -264,10 +264,10 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { // and different op name cost time,we set two event. platform::RecordEvent op_type_record_event( Type(), platform::TracerEventType::Operator, 1); - // auto op_name = platform::OpName(outputs_, Type()); - // platform::RecordEvent op_name_record_event( - // op_name, platform::TracerEventType::Operator, 1, - // platform::EventRole::kUniqueOp); + auto op_name = platform::OpName(outputs_, Type()); + platform::RecordEvent op_name_record_event( + op_name, platform::TracerEventType::Operator, 10, + platform::EventRole::kUniqueOp); RunImpl(scope, place); } diff --git a/paddle/fluid/platform/profiler/CMakeLists.txt b/paddle/fluid/platform/profiler/CMakeLists.txt index 5acdfa39569f037fb0db5fbb0037f6ce42d2bac0..c903a52530ccba8a6e0c0ccca27771617f488025 100755 --- a/paddle/fluid/platform/profiler/CMakeLists.txt +++ b/paddle/fluid/platform/profiler/CMakeLists.txt @@ -2,10 +2,12 @@ cc_library(host_tracer SRCS host_tracer.cc DEPS enforce) cc_library(cuda_tracer SRCS cuda_tracer.cc cupti_data_process.cc DEPS workqueue_utils enforce glog) cc_library(event_node SRCS event_node.cc DEPS enforce) cc_library(profiler_utils SRCS utils.cc DEPS enforce glog) -cc_library(chrometracinglogger SRCS chrometracing_logger.cc DEPS event_node profiler_utils) -cc_test(test_event_node SRCS test_event_node.cc DEPS event_node chrometracinglogger) add_subdirectory(dump) +cc_library(profiler_logger SRCS chrometracing_logger.cc dump/serialization_logger.cc dump/deserialization_reader.cc DEPS nodetreeproto event_node profiler_utils) +cc_library(event_bind SRCS event_python.cc DEPS profiler_logger) cc_library(cpu_utilization SRCS cpu_utilization.cc DEPS cpu_info os_info enforce glog) +cc_library(new_profiler SRCS profiler.cc DEPS host_tracer cuda_tracer profiler_utils cpu_utilization event_bind) +cc_test(test_event_node SRCS test_event_node.cc DEPS event_node profiler_logger) cc_test(test_extra_info SRCS test_extra_info.cc DEPS profiler_utils) -cc_library(new_profiler SRCS profiler.cc DEPS host_tracer cuda_tracer profiler_utils cpu_utilization) -cc_test(new_profiler_test SRCS profiler_test.cc DEPS new_profiler event_node) +cc_test(test_serialization_logger SRCS dump/test_serialization_logger.cc DEPS event_bind) +cc_test(new_profiler_test SRCS profiler_test.cc DEPS new_profiler) diff --git a/paddle/fluid/platform/profiler/chrometracing_logger.cc b/paddle/fluid/platform/profiler/chrometracing_logger.cc index 7b207ea7b20118064f447ad9bec83b0b0b793ca0..4061e2d4d494d55c703e411e692200d1fd28ba5b 100644 --- a/paddle/fluid/platform/profiler/chrometracing_logger.cc +++ b/paddle/fluid/platform/profiler/chrometracing_logger.cc @@ -18,40 +18,17 @@ limitations under the License. */ #include "glog/logging.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" -#include "paddle/fluid/platform/os_info.h" +#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/profiler/chrometracing_logger.h" #include "paddle/fluid/platform/profiler/event_node.h" +#include "paddle/fluid/platform/profiler/utils.h" namespace paddle { namespace platform { static const char* kSchemaVersion = "1.0.0"; static const char* kDefaultFilename = "pid_%s_time_%s.paddle_trace.json"; -static uint32_t num_span = 0; - -static int64_t nsToUs(int64_t ns) { return ns / 1000; } - -template -std::string string_format(const std::string& format, Args... args) { - int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + - 1; // Extra space for '\0' - PADDLE_ENFORCE_GE(size_s, 0, platform::errors::Fatal( - "Error during profiler data formatting.")); - auto size = static_cast(size_s); - auto buf = std::make_unique(size); - std::snprintf(buf.get(), size, format.c_str(), args...); - return std::string(buf.get(), size - 1); // exclude the '\0' -} - -std::string GetStringFormatLocalTime() { - std::time_t rawtime; - std::tm* timeinfo; - char buf[100]; - std::time(&rawtime); - timeinfo = std::localtime(&rawtime); - std::strftime(buf, 100, "%F-%X", timeinfo); - return std::string(buf); -} +static uint32_t span_indx = 0; static std::string DefaultFileName() { auto pid = GetProcessId(); @@ -60,16 +37,19 @@ static std::string DefaultFileName() { } const char* ChromeTracingLogger::categary_name_[] = { - "operator", "dataloader", "profile_step", "cuda_runtime", "kernel", - "memcpy", "memset", "user_defined", "others"}; + "Operator", "Dataloader", "ProfileStep", "CudaRuntime", + "Kernel", "Memcpy", "Memset", "UserDefined", + "OperatorInner", "Forward", "Backward", "Optimization", + "Communication", "PythonOp", "PythonUserDefined"}; void ChromeTracingLogger::OpenFile() { output_file_stream_.open(filename_, std::ofstream::out | std::ofstream::trunc); if (!output_file_stream_) { - VLOG(2) << "Unable to open file for writing profiling data." << std::endl; + LOG(WARNING) << "Unable to open file for writing profiling data." + << std::endl; } else { - VLOG(0) << "writing profiling data to " << filename_ << std::endl; + LOG(INFO) << "writing profiling data to " << filename_ << std::endl; } } @@ -122,21 +102,54 @@ void ChromeTracingLogger::LogHostTraceEventNode( if (!output_file_stream_) { return; } - output_file_stream_ << string_format( - std::string( - R"JSON( + switch (host_node.Type()) { + case TracerEventType::ProfileStep: + case TracerEventType::Forward: + case TracerEventType::Backward: + case TracerEventType::Dataloader: + case TracerEventType::Optimization: + case TracerEventType::PythonOp: + case TracerEventType::PythonUserDefined: + output_file_stream_ << string_format( + std::string( + R"JSON( { - "name": "%s", "pid": %lld, "tid": %lld, + "name": "%s", "pid": %lld, "tid": "%lld(Python)", "ts": %lld, "dur": %lld, "ph": "X", "cat": "%s", "args": { - + "start_ns": %lld, + "end_ns": %lld } }, )JSON"), - host_node.Name().c_str(), host_node.ProcessId(), host_node.ThreadId(), - nsToUs(host_node.StartNs()), nsToUs(host_node.Duration()), - categary_name_[static_cast(host_node.Type())]); + host_node.Name().c_str(), host_node.ProcessId(), host_node.ThreadId(), + nsToUs(host_node.StartNs()), nsToUs(host_node.Duration()), + categary_name_[static_cast(host_node.Type())], + host_node.StartNs(), host_node.EndNs()); + break; + default: + output_file_stream_ << string_format( + std::string( + R"JSON( + { + "name": "%s", "pid": %lld, "tid": "%lld(C++)", + "ts": %lld, "dur": %lld, + "ph": "X", "cat": "%s", + "args": { + "start_ns": %lld, + "end_ns": %lld + } + }, + )JSON"), + host_node.Name().c_str(), host_node.ProcessId(), host_node.ThreadId(), + nsToUs(host_node.StartNs()), nsToUs(host_node.Duration()), + categary_name_[static_cast(host_node.Type())], + host_node.StartNs(), host_node.EndNs()); + break; + } + + pid_tid_set_.insert({host_node.ProcessId(), host_node.ThreadId()}); } void ChromeTracingLogger::LogRuntimeTraceEventNode( @@ -148,11 +161,13 @@ void ChromeTracingLogger::LogRuntimeTraceEventNode( std::string( R"JSON( { - "name": "%s", "pid": %lld, "tid": %lld, + "name": "%s", "pid": %lld, "tid": "%lld(C++)", "ts": %lld, "dur": %lld, "ph": "X", "cat": "%s", "args": { - "correlation id": %d + "correlation id": %d, + "start_ns": %lld, + "end_ns": %lld } }, )JSON"), @@ -160,7 +175,23 @@ void ChromeTracingLogger::LogRuntimeTraceEventNode( runtime_node.ThreadId(), nsToUs(runtime_node.StartNs()), nsToUs(runtime_node.Duration()), categary_name_[static_cast(runtime_node.Type())], - runtime_node.CorrelationId()); + runtime_node.CorrelationId(), runtime_node.StartNs(), + runtime_node.EndNs()); + pid_tid_set_.insert({runtime_node.ProcessId(), runtime_node.ThreadId()}); + + output_file_stream_ << string_format( + std::string( + R"JSON( + { + "name": "launch", "id": %d, "pid": %lld, "tid": "%lld(C++)", + "ts": %lld, + "ph": "s", "cat": "async" + }, + )JSON"), + runtime_node.CorrelationId(), runtime_node.ProcessId(), + runtime_node.ThreadId(), + nsToUs((runtime_node.StartNs() + runtime_node.EndNs()) >> 1)); + pid_tid_set_.insert({runtime_node.ProcessId(), runtime_node.ThreadId()}); } void ChromeTracingLogger::LogDeviceTraceEventNode( @@ -180,6 +211,36 @@ void ChromeTracingLogger::LogDeviceTraceEventNode( default: break; } + if (nsToUs(device_node.Duration()) == 0) { + output_file_stream_ << string_format( + std::string( + R"JSON( + { + "name": "launch", "id": %d, "pid": %lld, "tid": %lld, + "ts": %lld, + "ph": "f", "cat": "async" + }, + )JSON"), + device_node.CorrelationId(), device_node.DeviceId(), + device_node.StreamId(), nsToUs(device_node.StartNs())); + deviceid_streamid_set_.insert( + {device_node.DeviceId(), device_node.StreamId()}); + } else { + output_file_stream_ << string_format( + std::string( + R"JSON( + { + "name": "launch", "id": %d, "pid": %lld, "tid": %lld, + "ts": %lld, + "ph": "f", "cat": "async", "bp": "e" + }, + )JSON"), + device_node.CorrelationId(), device_node.DeviceId(), + device_node.StreamId(), + nsToUs((device_node.StartNs() + device_node.EndNs()) >> 1)); + deviceid_streamid_set_.insert( + {device_node.DeviceId(), device_node.StreamId()}); + } } void ChromeTracingLogger::HandleTypeKernel( @@ -188,16 +249,21 @@ void ChromeTracingLogger::HandleTypeKernel( float blocks_per_sm = 0.0; float warps_per_sm = 0.0; float occupancy = 0.0; -#if defined(PADDLE_WITH_CUDA) +#if defined(PADDLE_WITH_CUPTI) constexpr int threads_per_warp = 32; const gpuDeviceProp& device_property = GetDeviceProperties(device_node.DeviceId()); - blocks_per_sm = - (kernel_info.grid_x * kernel_info.grid_y * kernel_info.grid_z) / - device_property.multiProcessorCount; + blocks_per_sm = static_cast(kernel_info.grid_x * kernel_info.grid_y * + kernel_info.grid_z) / + device_property.multiProcessorCount; warps_per_sm = blocks_per_sm * (kernel_info.block_x * kernel_info.block_y * kernel_info.block_z) / threads_per_warp; + occupancy = CalculateEstOccupancy( + device_node.DeviceId(), kernel_info.registers_per_thread, + kernel_info.static_shared_memory, kernel_info.dynamic_shared_memory, + kernel_info.block_x, kernel_info.block_y, kernel_info.block_z, + blocks_per_sm); #endif output_file_stream_ << string_format( @@ -208,15 +274,17 @@ void ChromeTracingLogger::HandleTypeKernel( "ts": %lld, "dur": %lld, "ph": "X", "cat": "%s", "args": { + "start_ns": %lld, + "end_ns": %lld, "device": %d, "context": %d, "stream": %d, "correlation id": %d, "registers per thread": %d, - "shared memory": %f, + "shared memory": %d, "blocks per SM": %f, "warps per SM": %f, "grid": [%d, %d, %d], "block": [%d, %d, %d], - "est. achieved occupancy %": %f + "theoretical achieved occupancy %%": %f } }, )JSON"), @@ -224,12 +292,13 @@ void ChromeTracingLogger::HandleTypeKernel( device_node.StreamId(), nsToUs(device_node.StartNs()), nsToUs(device_node.Duration()), categary_name_[static_cast(device_node.Type())], - device_node.DeviceId(), device_node.ContextId(), device_node.StreamId(), + device_node.StartNs(), device_node.EndNs(), device_node.DeviceId(), + device_node.ContextId(), device_node.StreamId(), device_node.CorrelationId(), kernel_info.registers_per_thread, kernel_info.static_shared_memory + kernel_info.dynamic_shared_memory, blocks_per_sm, warps_per_sm, kernel_info.grid_x, kernel_info.grid_y, kernel_info.grid_z, kernel_info.block_x, kernel_info.block_y, - kernel_info.block_z, occupancy); + kernel_info.block_z, occupancy * 100); } void ChromeTracingLogger::HandleTypeMemcpy( @@ -247,6 +316,8 @@ void ChromeTracingLogger::HandleTypeMemcpy( "ts": %lld, "dur": %lld, "ph": "X", "cat": "%s", "args": { + "start_ns": %lld, + "end_ns": %lld, "stream": %d, "correlation id": %d, "bytes": %d, "memory bandwidth (GB/s)": %f } @@ -256,8 +327,8 @@ void ChromeTracingLogger::HandleTypeMemcpy( device_node.StreamId(), nsToUs(device_node.StartNs()), nsToUs(device_node.Duration()), categary_name_[static_cast(device_node.Type())], - device_node.StreamId(), device_node.CorrelationId(), - memcpy_info.num_bytes, memory_bandwidth); + device_node.StartNs(), device_node.EndNs(), device_node.StreamId(), + device_node.CorrelationId(), memcpy_info.num_bytes, memory_bandwidth); } void ChromeTracingLogger::HandleTypeMemset( @@ -271,6 +342,8 @@ void ChromeTracingLogger::HandleTypeMemset( "ts": %lld, "dur": %lld, "ph": "X", "cat": "%s", "args": { + "start_ns": %lld, + "end_ns": %lld, "device": %d, "context": %d, "stream": %d, "correlation id": %d, "bytes": %d, "value": %d @@ -281,7 +354,8 @@ void ChromeTracingLogger::HandleTypeMemset( device_node.StreamId(), nsToUs(device_node.StartNs()), nsToUs(device_node.Duration()), categary_name_[static_cast(device_node.Type())], - device_node.DeviceId(), device_node.ContextId(), device_node.StreamId(), + device_node.StartNs(), device_node.EndNs(), device_node.DeviceId(), + device_node.ContextId(), device_node.StreamId(), device_node.CorrelationId(), memset_info.num_bytes, memset_info.value); } @@ -290,10 +364,10 @@ void ChromeTracingLogger::StartLog() { R"JSON( { "schemaVersion": "%s", - "displayTimeUnit": "us", - "SpanNumber": "%d", + "displayTimeUnit": "ms", + "span_indx": "%d", )JSON"), - kSchemaVersion, num_span); + kSchemaVersion, span_indx++); // add device property information #if defined(PADDLE_WITH_CUDA) output_file_stream_ << std::string(R"JSON( @@ -358,11 +432,143 @@ void ChromeTracingLogger::StartLog() { )JSON"); } -void ChromeTracingLogger::EndLog() { +void ChromeTracingLogger::LogMetaInfo( + const std::unordered_map extra_info) { + RefineDisplayName(extra_info); output_file_stream_ << std::string( R"JSON( {} - ] + ], + )JSON"); + output_file_stream_ << std::string(R"JSON( + "ExtraInfo": {)JSON"); + size_t count = extra_info.size(); + for (const auto& kv : extra_info) { + if (count > 1) { + output_file_stream_ << string_format(std::string(R"JSON( + "%s": "%s", + )JSON"), + kv.first.c_str(), kv.second.c_str()); + } else { + output_file_stream_ << string_format(std::string(R"JSON( + "%s": "%s" + )JSON"), + kv.first.c_str(), kv.second.c_str()); + } + count--; + } + output_file_stream_ << std::string(R"JSON( + })JSON"); +} + +void ChromeTracingLogger::RefineDisplayName( + std::unordered_map extra_info) { + for (auto it = pid_tid_set_.begin(); it != pid_tid_set_.end(); ++it) { + output_file_stream_ << string_format( + std::string( + R"JSON( + { + "name": "process_name", "pid": %lld, "tid": "%lld(Python)", + "ph": "M", + "args": { + "name": "Process %lld (CPU)" + } + }, + { + "name": "process_name", "pid": %lld, "tid": "%lld(C++)", + "ph": "M", + "args": { + "name": "Process %lld (CPU)" + } + }, + { + "name": "thread_name", "pid": %lld, "tid": "%lld(Python)", + "ph": "M", + "args": { + "name": "thread %lld:%s(Python)" + } + }, + { + "name": "thread_name", "pid": %lld, "tid": "%lld(C++)", + "ph": "M", + "args": { + "name": "thread %lld:%s(C++)" + } + }, + { + "name": "process_sort_index", "pid": %lld, "tid": %lld, + "ph": "M", + "args": { + "sort_index": %lld + } + }, + { + "name": "thread_sort_index", "pid": %lld, "tid": "%lld(Python)", + "ph": "M", + "args": { + "sort_index": %lld + } + }, + { + "name": "thread_sort_index", "pid": %lld, "tid": "%lld(C++)", + "ph": "M", + "args": { + "sort_index": %lld + } + }, + )JSON"), + (*it).first, (*it).second, (*it).first, (*it).first, (*it).second, + (*it).first, (*it).first, (*it).second, (*it).second, + extra_info[string_format(std::string("%lld"), (*it).second)].c_str(), + (*it).first, (*it).second, (*it).second, + extra_info[string_format(std::string("%lld"), (*it).second)].c_str(), + (*it).first, (*it).second, (*it).first, (*it).first, (*it).second, + (*it).second * 2, (*it).first, (*it).second, (*it).second * 2 + 1); + } + + for (auto it = deviceid_streamid_set_.begin(); + it != deviceid_streamid_set_.end(); ++it) { + output_file_stream_ << string_format( + std::string( + R"JSON( + { + "name": "process_name", "pid": %lld, "tid": %lld, + "ph": "M", + "args": { + "name": "Deivce %lld (GPU)" + } + }, + { + "name": "thread_name", "pid": %lld, "tid": %lld, + "ph": "M", + "args": { + "name": "stream %lld" + } + }, + { + "name": "process_sort_index", "pid": %lld, "tid": %lld, + "ph": "M", + "args": { + "sort_index": %lld + } + }, + { + "name": "thread_sort_index", "pid": %lld, "tid": %lld, + "ph": "M", + "args": { + "sort_index": %lld + } + }, + )JSON"), + (*it).first, (*it).second, (*it).first, (*it).first, (*it).second, + (*it).second, (*it).first, (*it).second, (*it).first + 0x10000000, + (*it).first, (*it).second, (*it).second); + } +} + +void ChromeTracingLogger::EndLog() { + output_file_stream_ << std::string( + R"JSON( } )JSON"); } diff --git a/paddle/fluid/platform/profiler/chrometracing_logger.h b/paddle/fluid/platform/profiler/chrometracing_logger.h index 06734418609d7def27c16a5ddd9e15589ee7a02a..20a924a54cabdf94e7aefece6b4aeffcdd848340 100644 --- a/paddle/fluid/platform/profiler/chrometracing_logger.h +++ b/paddle/fluid/platform/profiler/chrometracing_logger.h @@ -13,11 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include +#include +#include #include "paddle/fluid/platform/profiler/output_logger.h" namespace paddle { namespace platform { +// Dump a NodeTrees into a chrome tracing file. +// A ChromeTracingLogger object can only dump a NodeTrees object, +// creates a file in the constructor and closes the file in the destructor. +// should only call LogNodeTrees and LogMetaInfo in order. class ChromeTracingLogger : public BaseLogger { public: explicit ChromeTracingLogger(const std::string& filename); @@ -28,6 +35,7 @@ class ChromeTracingLogger : public BaseLogger { void LogHostTraceEventNode(const HostTraceEventNode&) override; void LogRuntimeTraceEventNode(const CudaRuntimeTraceEventNode&) override; void LogNodeTrees(const NodeTrees&) override; + void LogMetaInfo(const std::unordered_map); private: void OpenFile(); @@ -36,9 +44,12 @@ class ChromeTracingLogger : public BaseLogger { void HandleTypeMemcpy(const DeviceTraceEventNode&); void StartLog(); void EndLog(); + void RefineDisplayName(std::unordered_map); std::string filename_; std::ofstream output_file_stream_; static const char* categary_name_[]; + std::set> pid_tid_set_; + std::set> deviceid_streamid_set_; }; } // namespace platform diff --git a/paddle/fluid/platform/profiler/cpu_utilization.cc b/paddle/fluid/platform/profiler/cpu_utilization.cc index 672a9a154535a1cb76a4bbc2bde074b6eecefd9e..ce2e49a1ccd39accb8830943759d361d15d12d9d 100644 --- a/paddle/fluid/platform/profiler/cpu_utilization.cc +++ b/paddle/fluid/platform/profiler/cpu_utilization.cc @@ -54,19 +54,16 @@ void CpuUtilization::RecordBeginTimeInfo() { if (stat_file != nullptr) { char temp_str[200]; uint64_t temp_lu; - while (true) { - int retval = fscanf( - stat_file, "%s %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 - "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64, - temp_str, &system_tms_start_.tms_utime, &nice_time_start_, - &system_tms_start_.tms_stime, &idle_start_, &iowait_start_, - &irq_start_, &softirq_start_, &steal_start_, &temp_lu, &temp_lu); - if (std::string(temp_str).find("cpu") != 0) { - break; - } - if (retval != 11) { - return; - } + int retval = fscanf( + stat_file, "%s %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 + "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64, + temp_str, &system_tms_start_.tms_utime, &nice_time_start_, + &system_tms_start_.tms_stime, &idle_start_, &iowait_start_, &irq_start_, + &softirq_start_, &steal_start_, &temp_lu, &temp_lu); + if (retval != 11) { + LOG(WARNING) + << "Failed to read cpu utilization information at record beginning." + << std::endl; } fclose(stat_file); } @@ -90,19 +87,17 @@ void CpuUtilization::RecordEndTimeInfo() { if (stat_file != nullptr) { char temp_str[200]; uint64_t temp_lu; - while (true) { - int retval = fscanf( - stat_file, "%s %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 - "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64, - temp_str, &system_tms_end_.tms_utime, &nice_time_end_, - &system_tms_end_.tms_stime, &idle_end_, &iowait_end_, &irq_end_, - &softirq_end_, &steal_end_, &temp_lu, &temp_lu); - if (std::string(temp_str).find("cpu") != 0) { - break; - } - if (retval != 11) { - return; - } + int retval = fscanf( + stat_file, "%s %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 + "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64, + temp_str, &system_tms_end_.tms_utime, &nice_time_end_, + &system_tms_end_.tms_stime, &idle_end_, &iowait_end_, &irq_end_, + &softirq_end_, &steal_end_, &temp_lu, &temp_lu); + + if (retval != 11) { + LOG(WARNING) + << "Failed to read cpu utilization information at record end." + << std::endl; } fclose(stat_file); } diff --git a/paddle/fluid/platform/profiler/dump/CMakeLists.txt b/paddle/fluid/platform/profiler/dump/CMakeLists.txt index e25333f7a8a73864137a85bc64fe28506b86e081..5045c56afbc639848177d5fd41e212d46015e743 100644 --- a/paddle/fluid/platform/profiler/dump/CMakeLists.txt +++ b/paddle/fluid/platform/profiler/dump/CMakeLists.txt @@ -1,4 +1 @@ proto_library(nodetreeproto SRCS nodetree.proto) -cc_library(serialization_logger SRCS serialization_logger.cc DEPS nodetreeproto event_node) -cc_library(deserialization_reader SRCS deserialization_reader.cc DEPS nodetreeproto event_node) -cc_test(test_serialization_logger SRCS test_serialization_logger.cc DEPS serialization_logger deserialization_reader event_node) diff --git a/paddle/fluid/platform/profiler/dump/deserialization_reader.cc b/paddle/fluid/platform/profiler/dump/deserialization_reader.cc index d1049a7dc190845dc91013f688a27224f5e26b0e..de3411579d3e9f72959a26e0e870e079e08913e8 100644 --- a/paddle/fluid/platform/profiler/dump/deserialization_reader.cc +++ b/paddle/fluid/platform/profiler/dump/deserialization_reader.cc @@ -9,8 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/profiler/dump/deserialization_reader.h" - #include +#include "paddle/fluid/platform/profiler/extra_info.h" namespace paddle { namespace platform { @@ -36,11 +36,19 @@ void DeserializationReader::OpenFile() { } } -std::unique_ptr DeserializationReader::Parse() { +std::unique_ptr DeserializationReader::Parse() { if (!node_trees_proto_->ParseFromIstream(&input_file_stream_)) { VLOG(2) << "Unable to load node trees in protobuf." << std::endl; return nullptr; } + // restore extra info + ExtraInfo extrainfo; + for (auto indx = 0; indx < node_trees_proto_->extra_info_size(); indx++) { + ExtraInfoMap extra_info_map = node_trees_proto_->extra_info(indx); + extrainfo.AddExtraInfo(extra_info_map.key(), std::string("%s"), + extra_info_map.value().c_str()); + } + // restore NodeTrees std::map thread_event_trees_map; for (int node_tree_index = 0; node_tree_index < node_trees_proto_->thread_trees_size(); @@ -95,7 +103,9 @@ std::unique_ptr DeserializationReader::Parse() { } } // restore NodeTrees object - return std::unique_ptr(new NodeTrees(thread_event_trees_map)); + std::unique_ptr tree(new NodeTrees(thread_event_trees_map)); + return std::unique_ptr( + new ProfilerResult(std::move(tree), extrainfo)); } DeserializationReader::~DeserializationReader() { diff --git a/paddle/fluid/platform/profiler/dump/deserialization_reader.h b/paddle/fluid/platform/profiler/dump/deserialization_reader.h index 1ad2dabf229ad5665db6cc9f9ec43470f0b232f3..e6feb4f9489e886e534b3e3f153473dd362c8a9c 100644 --- a/paddle/fluid/platform/profiler/dump/deserialization_reader.h +++ b/paddle/fluid/platform/profiler/dump/deserialization_reader.h @@ -14,7 +14,7 @@ limitations under the License. */ #include #include "paddle/fluid/platform/profiler/dump/nodetree.pb.h" -#include "paddle/fluid/platform/profiler/event_node.h" +#include "paddle/fluid/platform/profiler/event_python.h" namespace paddle { namespace platform { @@ -24,7 +24,7 @@ class DeserializationReader { explicit DeserializationReader(const std::string& filename); explicit DeserializationReader(const char* filename); ~DeserializationReader(); - std::unique_ptr Parse(); + std::unique_ptr Parse(); private: void OpenFile(); diff --git a/paddle/fluid/platform/profiler/dump/nodetree.proto b/paddle/fluid/platform/profiler/dump/nodetree.proto index 37dac0e597ce208da05271ff88c6f28b3c9dd9f9..7016745059d402f3db485e0a6fef41e0e7e39dae 100644 --- a/paddle/fluid/platform/profiler/dump/nodetree.proto +++ b/paddle/fluid/platform/profiler/dump/nodetree.proto @@ -32,9 +32,21 @@ enum TracerEventTypeProto { Memset = 6; // Used to mark record defined by user UserDefined = 7; - // A flag to denote the number of current types - NumTypes = 8; -} + // Used to mark operator detail, (such as infer shape, compute) + OperatorInner = 8; + // Used to mark model training or testing perspective, forward process + Forward = 9; + // Used to mark model training perspective, backward process + Backward = 10; + // Used to mark model training perspective, optimization process + Optimization = 11; + // Used to mark distributed training perspective + Communication = 12; + // Used to mark python api + PythonOp = 13; + // Used to mark python level userdefined + PythonUserDefined = 14; +}; message KernelEventInfoProto { // The X-dimension block size for the kernel. @@ -175,7 +187,14 @@ message ThreadNodeTreeProto { repeated HostTraceEventNodeProto host_nodes = 2; } +message ExtraInfoMap { + required string key = 1; + required string value = 2; +} + message NodeTreesProto { required string version = 1; - repeated ThreadNodeTreeProto thread_trees = 2; + required uint32 span_indx = 2; + repeated ThreadNodeTreeProto thread_trees = 3; + repeated ExtraInfoMap extra_info = 4; } diff --git a/paddle/fluid/platform/profiler/dump/serialization_logger.cc b/paddle/fluid/platform/profiler/dump/serialization_logger.cc index d9ed84bd438a7e2ac95a6637b6efcae870a8ad75..73021f4362af5df52129dac394de94e59f6e5f3b 100644 --- a/paddle/fluid/platform/profiler/dump/serialization_logger.cc +++ b/paddle/fluid/platform/profiler/dump/serialization_logger.cc @@ -13,6 +13,7 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler/dump/serialization_logger.h" #include "paddle/fluid/platform/profiler/event_node.h" +#include "paddle/fluid/platform/profiler/extra_info.h" #include "paddle/fluid/platform/profiler/utils.h" namespace paddle { @@ -20,6 +21,7 @@ namespace platform { static const char* kDefaultFilename = "pid_%s_time_%s.paddle_trace.pb"; static const char* version = "1.0.0"; +static uint32_t span_indx = 0; static std::string DefaultFileName() { auto pid = GetProcessId(); @@ -39,6 +41,7 @@ void SerializationLogger::OpenFile() { } node_trees_proto_ = new NodeTreesProto(); node_trees_proto_->set_version(std::string(version)); + node_trees_proto_->set_span_indx(span_indx++); } void SerializationLogger::LogNodeTrees(const NodeTrees& node_trees) { @@ -240,6 +243,15 @@ void SerializationLogger::HandleTypeMemset( device_trace_event); } +void SerializationLogger::LogMetaInfo( + const std::unordered_map extra_info) { + for (const auto& kv : extra_info) { + ExtraInfoMap* extra_info_map = node_trees_proto_->add_extra_info(); + extra_info_map->set_key(kv.first); + extra_info_map->set_value(kv.second); + } +} + SerializationLogger::SerializationLogger(const std::string& filename) { filename_ = filename.empty() ? DefaultFileName() : filename; OpenFile(); diff --git a/paddle/fluid/platform/profiler/dump/serialization_logger.h b/paddle/fluid/platform/profiler/dump/serialization_logger.h old mode 100755 new mode 100644 index 1295be95d45316d6884b68b3115caefa7905d673..378834cff590d03090ca9030eb962a801abb9c4d --- a/paddle/fluid/platform/profiler/dump/serialization_logger.h +++ b/paddle/fluid/platform/profiler/dump/serialization_logger.h @@ -11,6 +11,8 @@ limitations under the License. */ #pragma once +#include + #include "paddle/fluid/platform/profiler/dump/nodetree.pb.h" #include "paddle/fluid/platform/profiler/output_logger.h" @@ -20,6 +22,7 @@ namespace platform { // Dump a NodeTrees into a profobuf file. // A SerializationLogger object can only dump a NodeTrees object, // creates a file in the constructor and closes the file in the destructor. +// Should only call LogNodeTrees and LogMetaInfo. class SerializationLogger : public BaseLogger { public: explicit SerializationLogger(const std::string& filename); @@ -30,12 +33,14 @@ class SerializationLogger : public BaseLogger { void LogHostTraceEventNode(const HostTraceEventNode&) override; void LogRuntimeTraceEventNode(const CudaRuntimeTraceEventNode&) override; void LogNodeTrees(const NodeTrees&) override; + void LogMetaInfo(const std::unordered_map); private: void OpenFile(); void HandleTypeKernel(const DeviceTraceEventNode&); void HandleTypeMemset(const DeviceTraceEventNode&); void HandleTypeMemcpy(const DeviceTraceEventNode&); + std::string filename_; std::ofstream output_file_stream_; NodeTreesProto* node_trees_proto_; diff --git a/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc b/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc index 2fe9626ec76df5654d19e785d043311f5f00496e..dee1019da2b52f93a2c2383f8f38c2c1662e63cb 100644 --- a/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc +++ b/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc @@ -17,6 +17,7 @@ #include "paddle/fluid/platform/profiler/dump/deserialization_reader.h" #include "paddle/fluid/platform/profiler/dump/serialization_logger.h" #include "paddle/fluid/platform/profiler/event_node.h" +#include "paddle/fluid/platform/profiler/event_python.h" using paddle::platform::SerializationLogger; using paddle::platform::DeserializationReader; @@ -31,6 +32,7 @@ using paddle::platform::TracerEventType; using paddle::platform::KernelEventInfo; using paddle::platform::MemcpyEventInfo; using paddle::platform::MemsetEventInfo; +using paddle::platform::ProfilerResult; TEST(SerializationLoggerTest, dump_case0) { std::list host_events; @@ -149,7 +151,8 @@ TEST(SerializationLoggerTest, dump_case1) { TEST(DeserializationReaderTest, restore_case0) { DeserializationReader reader("test_serialization_logger_case0.pb"); - std::unique_ptr tree = reader.Parse(); + auto profiler_result = reader.Parse(); + auto& tree = profiler_result->GetNodeTrees(); std::map> nodes = tree->Traverse(true); EXPECT_EQ(nodes[10].size(), 4u); @@ -172,3 +175,26 @@ TEST(DeserializationReaderTest, restore_case0) { } } } + +TEST(DeserializationReaderTest, restore_case1) { + DeserializationReader reader("test_serialization_logger_case1.pb"); + auto profiler_result = reader.Parse(); + auto& tree = profiler_result->GetNodeTrees(); + std::map> nodes = + tree->Traverse(true); + EXPECT_EQ(nodes[10].size(), 1u); + EXPECT_EQ(nodes[11].size(), 1u); + std::vector thread1_nodes = nodes[10]; + std::vector thread2_nodes = nodes[11]; + for (auto it = thread1_nodes.begin(); it != thread1_nodes.end(); it++) { + if ((*it)->Name() == "root node") { + EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 3u); + } + } + for (auto it = thread2_nodes.begin(); it != thread2_nodes.end(); it++) { + if ((*it)->Name() == "root node") { + EXPECT_EQ((*it)->GetChildren().size(), 0u); + EXPECT_EQ((*it)->GetRuntimeTraceEventNodes().size(), 2u); + } + } +} diff --git a/paddle/fluid/platform/profiler/event_python.cc b/paddle/fluid/platform/profiler/event_python.cc new file mode 100644 index 0000000000000000000000000000000000000000..1a6f19d2f93aff104e2100b7591982a7a1d6400f --- /dev/null +++ b/paddle/fluid/platform/profiler/event_python.cc @@ -0,0 +1,122 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/platform/profiler/event_python.h" +#include "paddle/fluid/platform/profiler/chrometracing_logger.h" +#include "paddle/fluid/platform/profiler/dump/deserialization_reader.h" +#include "paddle/fluid/platform/profiler/dump/serialization_logger.h" +#include "paddle/fluid/platform/profiler/extra_info.h" + +namespace paddle { +namespace platform { + +HostPythonNode::~HostPythonNode() { + // delete all runtime or device nodes and recursive delete children + for (auto it = children_node_ptrs.begin(); it != children_node_ptrs.end(); + ++it) { + delete *it; + } + for (auto it = runtime_node_ptrs.begin(); it != runtime_node_ptrs.end(); + ++it) { + delete *it; + } + for (auto it = device_node_ptrs.begin(); it != device_node_ptrs.end(); ++it) { + delete *it; + } +} + +HostPythonNode* ProfilerResult::CopyTree(HostTraceEventNode* root) { + // Copy and transfer EventNode in NodeTree to PythonNode + if (root == nullptr) { + return nullptr; + } + // copy HostTraceEventNode and its children + HostPythonNode* host_python_node = new HostPythonNode(); + host_python_node->name = root->Name(); + host_python_node->type = root->Type(); + host_python_node->start_ns = root->StartNs(); + host_python_node->end_ns = root->EndNs(); + host_python_node->process_id = root->ProcessId(); + host_python_node->thread_id = root->ThreadId(); + for (auto it = root->GetChildren().begin(); it != root->GetChildren().end(); + ++it) { + host_python_node->children_node_ptrs.push_back(CopyTree(*it)); + } + // copy its CudaRuntimeTraceEventNode + for (auto runtimenode = root->GetRuntimeTraceEventNodes().begin(); + runtimenode != root->GetRuntimeTraceEventNodes().end(); ++runtimenode) { + HostPythonNode* runtime_python_node = new HostPythonNode(); + runtime_python_node->name = (*runtimenode)->Name(); + runtime_python_node->type = (*runtimenode)->Type(); + runtime_python_node->start_ns = (*runtimenode)->StartNs(); + runtime_python_node->end_ns = (*runtimenode)->EndNs(); + runtime_python_node->process_id = (*runtimenode)->ProcessId(); + runtime_python_node->thread_id = (*runtimenode)->ThreadId(); + host_python_node->runtime_node_ptrs.push_back(runtime_python_node); + // copy DeviceTraceEventNode + for (auto devicenode = (*runtimenode)->GetDeviceTraceEventNodes().begin(); + devicenode != (*runtimenode)->GetDeviceTraceEventNodes().end(); + ++devicenode) { + DevicePythonNode* device_python_node = new DevicePythonNode(); + device_python_node->name = (*devicenode)->Name(); + device_python_node->type = (*devicenode)->Type(); + device_python_node->start_ns = (*devicenode)->StartNs(); + device_python_node->end_ns = (*devicenode)->EndNs(); + device_python_node->device_id = (*devicenode)->DeviceId(); + device_python_node->context_id = (*devicenode)->ContextId(); + device_python_node->stream_id = (*devicenode)->StreamId(); + runtime_python_node->device_node_ptrs.push_back(device_python_node); + } + } + return host_python_node; +} + +ProfilerResult::ProfilerResult(std::unique_ptr tree, + const ExtraInfo& extra_info) + : tree_(std::move(tree)), extra_info_(extra_info) { + if (tree_ != nullptr) { + std::map nodetrees = tree_->GetNodeTrees(); + for (auto it = nodetrees.begin(); it != nodetrees.end(); ++it) { + thread_event_trees_map_[it->first] = CopyTree(it->second); + } + } +} + +ProfilerResult::~ProfilerResult() { + // delete all root nodes + for (auto it = thread_event_trees_map_.begin(); + it != thread_event_trees_map_.end(); ++it) { + delete it->second; + } +} + +void ProfilerResult::Save(const std::string& file_name, + const std::string format) { + if (format == std::string("json")) { + ChromeTracingLogger logger(file_name); + tree_->LogMe(&logger); + logger.LogMetaInfo(GetExtraInfo()); + } else if (format == std::string("pb")) { + SerializationLogger logger(file_name); + tree_->LogMe(&logger); + logger.LogMetaInfo(GetExtraInfo()); + } + return; +} + +std::unique_ptr LoadProfilerResult(std::string filename) { + DeserializationReader reader(filename); + std::unique_ptr result = reader.Parse(); + return result; +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/profiler/event_python.h b/paddle/fluid/platform/profiler/event_python.h index b0d8eaa2427163a244224ec93e3a4e6b8d97dd25..12ecb9fde32aa0aedf9bf70e8e5bdf3d6c34b69f 100644 --- a/paddle/fluid/platform/profiler/event_python.h +++ b/paddle/fluid/platform/profiler/event_python.h @@ -15,8 +15,11 @@ limitations under the License. */ #pragma once #include +#include +#include #include "paddle/fluid/platform/profiler/event_node.h" +#include "paddle/fluid/platform/profiler/extra_info.h" namespace paddle { namespace platform { @@ -66,18 +69,29 @@ struct HostPythonNode { class ProfilerResult { public: ProfilerResult() : tree_(nullptr) {} - explicit ProfilerResult(NodeTrees* tree); + explicit ProfilerResult(std::unique_ptr tree, + const ExtraInfo& extra_info); ~ProfilerResult(); std::map GetData() { - return thread_event_trees_map; + return thread_event_trees_map_; } - void Save(const std::string& file_name); + std::unordered_map GetExtraInfo() { + return extra_info_.GetExtraInfo(); + } + + void Save(const std::string& file_name, + const std::string format = std::string("json")); + + std::unique_ptr& GetNodeTrees() { return tree_; } private: - std::map thread_event_trees_map; - NodeTrees* tree_; - HostPythonNode* CopyTree(HostTraceEventNode* node); + std::map thread_event_trees_map_; + std::unique_ptr tree_; + ExtraInfo extra_info_; + HostPythonNode* CopyTree(HostTraceEventNode* root); }; +std::unique_ptr LoadProfilerResult(std::string filename); + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/profiler/profiler.cc b/paddle/fluid/platform/profiler/profiler.cc index 5784d6e671bbbc69a7762e5a0e757310fc5e7a3b..35dbc96874d3c8e09fec3ad9b440619b180647fe 100644 --- a/paddle/fluid/platform/profiler/profiler.cc +++ b/paddle/fluid/platform/profiler/profiler.cc @@ -25,8 +25,10 @@ #endif #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/profiler/cuda_tracer.h" +#include "paddle/fluid/platform/profiler/extra_info.h" #include "paddle/fluid/platform/profiler/host_tracer.h" #include "paddle/fluid/platform/profiler/trace_event_collector.h" +#include "paddle/fluid/platform/profiler/utils.h" namespace paddle { namespace platform { @@ -44,10 +46,15 @@ std::unique_ptr Profiler::Create(const ProfilerOptions& options) { Profiler::Profiler(const ProfilerOptions& options) { options_ = options; - HostTracerOptions host_tracer_options; - host_tracer_options.trace_level = options.trace_level; - tracers_.emplace_back(new HostTracer(host_tracer_options), true); - tracers_.emplace_back(&CudaTracer::GetInstance(), false); + std::bitset<32> trace_switch(options_.trace_switch); + if (trace_switch.test(kProfileCPUOptionBit)) { + HostTracerOptions host_tracer_options; + host_tracer_options.trace_level = options_.trace_level; + tracers_.emplace_back(new HostTracer(host_tracer_options), true); + } + if (trace_switch.test(kProfileGPUOptionBit)) { + tracers_.emplace_back(&CudaTracer::GetInstance(), false); + } } Profiler::~Profiler() { alive_.store(false); } @@ -63,9 +70,10 @@ void Profiler::Start() { for (auto& tracer : tracers_) { tracer.Get().StartTracing(); } + cpu_utilization_.RecordBeginTimeInfo(); } -std::unique_ptr Profiler::Stop() { +std::unique_ptr Profiler::Stop() { SynchronizeAllDevice(); TraceEventCollector collector; for (auto& tracer : tracers_) { @@ -75,7 +83,22 @@ std::unique_ptr Profiler::Stop() { std::unique_ptr tree(new NodeTrees(collector.HostEvents(), collector.RuntimeEvents(), collector.DeviceEvents())); - return tree; + cpu_utilization_.RecordEndTimeInfo(); + ExtraInfo extrainfo; + extrainfo.AddExtraInfo(std::string("System Cpu Utilization"), + std::string("%f"), + cpu_utilization_.GetCpuUtilization()); + extrainfo.AddExtraInfo(std::string("Process Cpu Utilization"), + std::string("%f"), + cpu_utilization_.GetCpuCurProcessUtilization()); + const std::unordered_map thread_names = + collector.ThreadNames(); + for (const auto& kv : thread_names) { + extrainfo.AddExtraInfo(string_format(std::string("%llu"), kv.first), + kv.second); + } + return std::unique_ptr( + new platform::ProfilerResult(std::move(tree), extrainfo)); } } // namespace platform diff --git a/paddle/fluid/platform/profiler/profiler.h b/paddle/fluid/platform/profiler/profiler.h index 4fc1c6daf96c7f30cbd549b23b1a8f23563bc590..f9a8ece050492805226cccce001251c3cd2ad0c2 100644 --- a/paddle/fluid/platform/profiler/profiler.h +++ b/paddle/fluid/platform/profiler/profiler.h @@ -15,12 +15,15 @@ #pragma once #include +#include #include #include #include #include #include "paddle/fluid/platform/macros.h" +#include "paddle/fluid/platform/profiler/cpu_utilization.h" #include "paddle/fluid/platform/profiler/event_node.h" +#include "paddle/fluid/platform/profiler/event_python.h" #include "paddle/fluid/platform/profiler/tracer_base.h" DECLARE_int64(host_trace_level); @@ -28,7 +31,11 @@ DECLARE_int64(host_trace_level); namespace paddle { namespace platform { +static constexpr uint32_t kProfileCPUOptionBit = 0; +static constexpr uint32_t kProfileGPUOptionBit = 1; + struct ProfilerOptions { + uint32_t trace_switch = 0; // bit 0: cpu, bit 1: gpu uint32_t trace_level = FLAGS_host_trace_level; }; @@ -40,7 +47,7 @@ class Profiler { void Start(); - std::unique_ptr Stop(); + std::unique_ptr Stop(); ~Profiler(); @@ -70,6 +77,7 @@ class Profiler { ProfilerOptions options_; uint64_t start_ns_ = UINT64_MAX; std::list tracers_; + CpuUtilization cpu_utilization_; }; } // namespace platform diff --git a/paddle/fluid/platform/profiler/profiler_test.cc b/paddle/fluid/platform/profiler/profiler_test.cc index 160c801dc6e3efa0a73ad132cc5509b03f7cffa8..32310b9e86228468a0b7e1e94f302a35bb834f8a 100644 --- a/paddle/fluid/platform/profiler/profiler_test.cc +++ b/paddle/fluid/platform/profiler/profiler_test.cc @@ -22,6 +22,7 @@ #ifdef PADDLE_WITH_HIP #include #endif +#include "paddle/fluid/platform/profiler/event_python.h" #include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/fluid/platform/profiler/profiler.h" @@ -30,8 +31,10 @@ TEST(ProfilerTest, TestHostTracer) { using paddle::platform::Profiler; using paddle::platform::RecordInstantEvent; using paddle::platform::TracerEventType; + using paddle::platform::ProfilerResult; ProfilerOptions options; options.trace_level = 2; + options.trace_switch = 3; auto profiler = Profiler::Create(options); EXPECT_TRUE(profiler); profiler->Prepare(); @@ -42,7 +45,8 @@ TEST(ProfilerTest, TestHostTracer) { RecordInstantEvent("TestTraceLevel_record2", TracerEventType::UserDefined, 3); } - auto nodetree = profiler->Stop(); + auto profiler_result = profiler->Stop(); + auto& nodetree = profiler_result->GetNodeTrees(); std::set host_events; for (const auto pair : nodetree->Traverse(true)) { for (const auto evt : pair.second) { @@ -56,8 +60,10 @@ TEST(ProfilerTest, TestHostTracer) { TEST(ProfilerTest, TestCudaTracer) { using paddle::platform::ProfilerOptions; using paddle::platform::Profiler; + using paddle::platform::ProfilerResult; ProfilerOptions options; options.trace_level = 0; + options.trace_switch = 3; auto profiler = Profiler::Create(options); EXPECT_TRUE(profiler); profiler->Prepare(); @@ -72,7 +78,8 @@ TEST(ProfilerTest, TestCudaTracer) { hipStreamCreate(&stream); hipStreamSynchronize(stream); #endif - auto nodetree = profiler->Stop(); + auto profiler_result = profiler->Stop(); + auto& nodetree = profiler_result->GetNodeTrees(); std::vector runtime_events; for (const auto pair : nodetree->Traverse(true)) { for (const auto host_node : pair.second) { diff --git a/paddle/fluid/platform/profiler/trace_event.h b/paddle/fluid/platform/profiler/trace_event.h index 61f96218560ecf56502fc945bddf4486e7f624d3..16ef62fb51555182c07888819e5eee9be61b42c0 100644 --- a/paddle/fluid/platform/profiler/trace_event.h +++ b/paddle/fluid/platform/profiler/trace_event.h @@ -48,6 +48,8 @@ enum class TracerEventType { Communication = 12, // Used to mark python api PythonOp = 13, + // Used to mark python level userdefined + PythonUserDefined = 14, // A flag to denote the number of current types NumTypes };