diff --git a/paddle/fluid/platform/profiler/chrometracing_logger.cc b/paddle/fluid/platform/profiler/chrometracing_logger.cc index 72d343692df730c44cf52e652f5ca39a2f9893ff..1a9ff2e6694eaac86bb91c49d949a578dcfc73d0 100644 --- a/paddle/fluid/platform/profiler/chrometracing_logger.cc +++ b/paddle/fluid/platform/profiler/chrometracing_logger.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include +#include #include "glog/logging.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" @@ -125,22 +126,26 @@ void ChromeTracingLogger::LogMemTraceEventNode( std::string( R"JSON( { - "name": "[memory]", "pid": %lld, "tid": "%lld", + "name": "[memory]", "pid": %lld, "tid": "%lld(C++)", "ts": %lld, "ph": "i", "cat": "%s", "args": { "place": "%s", "addr": "%llu", + "increase_bytes": %lld, "current_allocated": %llu, "current_reserved": %llu, - "increase_bytes": %lld + "peak_allocated": %llu, + "peak_reserved": %llu } }, )JSON"), - mem_node.ProcessId(), mem_node.ThreadId(), mem_node.TimeStampNs(), + mem_node.ProcessId(), mem_node.ThreadId(), nsToUs(mem_node.TimeStampNs()), StringTracerMemEventType(mem_node.Type()), mem_node.Place().c_str(), - mem_node.Addr(), mem_node.CurrentAllocated(), mem_node.CurrentReserved(), - mem_node.IncreaseBytes()); + mem_node.Addr(), mem_node.IncreaseBytes(), mem_node.CurrentAllocated(), + mem_node.CurrentReserved(), mem_node.PeakAllocated(), + mem_node.PeakReserved()); + pid_tid_set_.insert({mem_node.ProcessId(), mem_node.ThreadId()}); } void ChromeTracingLogger::LogHostTraceEventNode( @@ -164,6 +169,8 @@ void ChromeTracingLogger::LogHostTraceEventNode( input_shapes = op_supplement_node->InputShapes(); input_dtypes = op_supplement_node->Dtypes(); callstack = op_supplement_node->CallStack(); + callstack = std::regex_replace(callstack, std::regex("\""), "\'"); + callstack = std::regex_replace(callstack, std::regex("\n"), "\\n"); } switch (host_node.Type()) { case TracerEventType::ProfileStep: diff --git a/paddle/fluid/platform/profiler/dump/deserialization_reader.cc b/paddle/fluid/platform/profiler/dump/deserialization_reader.cc index 65f5e81238bc85b00ab068f536ecc216a8a2438e..e5de858e15c76f4876f250e778d4dad9b27f2fd6 100644 --- a/paddle/fluid/platform/profiler/dump/deserialization_reader.cc +++ b/paddle/fluid/platform/profiler/dump/deserialization_reader.cc @@ -209,6 +209,8 @@ MemTraceEventNode* DeserializationReader::RestoreMemTraceEventNode( mem_event.place = mem_event_proto.place(); mem_event.current_allocated = mem_event_proto.current_allocated(); mem_event.current_reserved = mem_event_proto.current_reserved(); + mem_event.peak_allocated = mem_event_proto.peak_allocated(); + mem_event.peak_reserved = mem_event_proto.peak_reserved(); return new MemTraceEventNode(mem_event); } diff --git a/paddle/fluid/platform/profiler/dump/nodetree.proto b/paddle/fluid/platform/profiler/dump/nodetree.proto index 0f0c9c92c9c937e9a4198d81dcf3a4772b99cdea..4ebfb6e73b331ccb340db842c87c4a1f93d5c4f7 100644 --- a/paddle/fluid/platform/profiler/dump/nodetree.proto +++ b/paddle/fluid/platform/profiler/dump/nodetree.proto @@ -51,10 +51,14 @@ enum TracerEventTypeProto { }; enum TracerMemEventTypeProto { - // Used to mark memory allocation + // Used to mark memory allocation which is managed by paddle Allocate = 0; - // Used to mark memory free + // Used to mark memory free which is managed by paddle Free = 1; + // Used to mark reserved memory allocation which is applied from device. + ReservedAllocate = 2; + // Used to mark reserved memory free which is released to device. + ReservedFree = 3; }; message KernelEventInfoProto { @@ -150,6 +154,10 @@ message MemTraceEventProto { required uint64 current_allocated = 8; // current total reserved memory required uint64 current_reserved = 9; + // current peak allocated memory + required uint64 peak_allocated = 10; + // current peak reserved memory + required uint64 peak_reserved = 11; } message OperatorSupplementEventProto { diff --git a/paddle/fluid/platform/profiler/dump/serialization_logger.cc b/paddle/fluid/platform/profiler/dump/serialization_logger.cc index eaf1353168ea4c729be8c78fc81f9e1ba1f32745..7b1c5bdaa41bc291a681624abdcc7da18907cb7a 100644 --- a/paddle/fluid/platform/profiler/dump/serialization_logger.cc +++ b/paddle/fluid/platform/profiler/dump/serialization_logger.cc @@ -130,6 +130,8 @@ void SerializationLogger::LogMemTraceEventNode( mem_trace_event->set_place(mem_node.Place()); mem_trace_event->set_current_allocated(mem_node.CurrentAllocated()); mem_trace_event->set_current_reserved(mem_node.CurrentReserved()); + mem_trace_event->set_peak_allocated(mem_node.PeakAllocated()); + mem_trace_event->set_peak_reserved(mem_node.PeakReserved()); current_mem_trace_event_node_proto_->set_allocated_mem_event(mem_trace_event); } diff --git a/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc b/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc index dc6a6bf32d6e30b3e3a9a9855b92079bcee4c0d9..0a3bda1c345187975469b84824c96d9b0b8b183a 100644 --- a/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc +++ b/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc @@ -53,9 +53,9 @@ TEST(SerializationLoggerTest, dump_case0) { std::string("op3"), TracerEventType::Operator, 31000, 40000, 10, 11)); mem_events.push_back(MemTraceEvent(11500, 0x1000, TracerMemEventType::Allocate, 10, 10, 50, - "GPU:0", 50, 50)); + "GPU:0", 50, 50, 100, 100)); mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free, - 10, 10, -50, "GPU:0", 0, 50)); + 10, 10, -50, "GPU:0", 0, 50, 100, 100)); std::map>> input_shapes; std::map> dtypes; input_shapes[std::string("X")].push_back(std::vector{1, 2, 3}); diff --git a/paddle/fluid/platform/profiler/event_node.h b/paddle/fluid/platform/profiler/event_node.h index acd5a03109f729db994e5bcda304de5e3a560d58..3ffa9241e9bfb718c7f1a7be2205f8e2c08bd29a 100644 --- a/paddle/fluid/platform/profiler/event_node.h +++ b/paddle/fluid/platform/profiler/event_node.h @@ -47,6 +47,8 @@ class MemTraceEventNode { std::string Place() const { return mem_event_.place; } uint64_t CurrentAllocated() const { return mem_event_.current_allocated; } uint64_t CurrentReserved() const { return mem_event_.current_reserved; } + uint64_t PeakAllocated() const { return mem_event_.peak_allocated; } + uint64_t PeakReserved() const { return mem_event_.peak_reserved; } // member function void LogMe(BaseLogger* logger) { logger->LogMemTraceEventNode(*this); } diff --git a/paddle/fluid/platform/profiler/event_python.cc b/paddle/fluid/platform/profiler/event_python.cc index 4e40e87bbbf208817982e6b2726684169c565431..162bf5da642b43070c3b6870fd54246a1455cce4 100644 --- a/paddle/fluid/platform/profiler/event_python.cc +++ b/paddle/fluid/platform/profiler/event_python.cc @@ -93,6 +93,8 @@ HostPythonNode* ProfilerResult::CopyTree(HostTraceEventNode* root) { mem_python_node->place = (*memnode)->Place(); mem_python_node->current_allocated = (*memnode)->CurrentAllocated(); mem_python_node->current_reserved = (*memnode)->CurrentReserved(); + mem_python_node->peak_allocated = (*memnode)->PeakAllocated(); + mem_python_node->peak_reserved = (*memnode)->PeakReserved(); host_python_node->mem_node_ptrs.push_back(mem_python_node); } // copy OperatorSupplementEventNode's information if exists diff --git a/paddle/fluid/platform/profiler/event_python.h b/paddle/fluid/platform/profiler/event_python.h index 4d1f5ad4f788ea62f207e22602efef015478f34b..9c5ac28f36f5b25c9889599e93718f1fabc72de6 100644 --- a/paddle/fluid/platform/profiler/event_python.h +++ b/paddle/fluid/platform/profiler/event_python.h @@ -66,6 +66,10 @@ struct MemPythonNode { uint64_t current_allocated; // current total reserved memory uint64_t current_reserved; + // peak allocated memory + uint64_t peak_allocated; + // peak reserved memory + uint64_t peak_reserved; }; struct HostPythonNode { diff --git a/paddle/fluid/platform/profiler/test_event_node.cc b/paddle/fluid/platform/profiler/test_event_node.cc index b70034633ae661f9328febd15670af31c5a2ffc4..3f825ce63cd83ee9d1c4f60b9788aab88ecad213 100644 --- a/paddle/fluid/platform/profiler/test_event_node.cc +++ b/paddle/fluid/platform/profiler/test_event_node.cc @@ -50,9 +50,9 @@ TEST(NodeTreesTest, LogMe_case0) { std::string("op3"), TracerEventType::Operator, 31000, 40000, 10, 11)); mem_events.push_back(MemTraceEvent(11500, 0x1000, TracerMemEventType::Allocate, 10, 10, 50, - "GPU:0", 50, 50)); + "GPU:0", 50, 50, 100, 100)); mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free, - 10, 10, -50, "GPU:0", 0, 50)); + 10, 10, -50, "GPU:0", 0, 50, 100, 100)); std::map>> input_shapes; std::map> dtypes; input_shapes[std::string("X")].push_back(std::vector{1, 2, 3}); @@ -185,9 +185,9 @@ TEST(NodeTreesTest, HandleTrees_case0) { std::string("op3"), TracerEventType::Operator, 2000, 120000, 10, 11)); mem_events.push_back(MemTraceEvent(11500, 0x1000, TracerMemEventType::Allocate, 10, 10, 50, - "GPU:0", 50, 50)); + "GPU:0", 50, 50, 100, 100)); mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free, - 10, 10, -50, "GPU:0", 0, 50)); + 10, 10, -50, "GPU:0", 0, 50, 100, 100)); op_supplement_events.push_back(OperatorSupplementEvent( 11600, "op1", std::map>>(), std::map>(), "op1()", 10, 10)); diff --git a/paddle/fluid/platform/profiler/trace_event.h b/paddle/fluid/platform/profiler/trace_event.h index bfa000e2683de844152331c35cebfe9aa8c31a1b..b2504a5ec458d773aaec55e78c20afe4dc83c2a1 100644 --- a/paddle/fluid/platform/profiler/trace_event.h +++ b/paddle/fluid/platform/profiler/trace_event.h @@ -59,10 +59,14 @@ enum class TracerEventType { }; enum class TracerMemEventType { - // Used to mark memory allocation + // Used to mark memory allocation which is managed by paddle Allocate = 0, - // Used to mark memory free + // Used to mark memory free which is managed by paddle Free = 1, + // Used to mark reserved memory allocation which is applied from device. + ReservedAllocate = 2, + // Used to mark reserved memory free which is released to device. + ReservedFree = 3, // A flag to denote the number of current types NumTypes }; @@ -288,7 +292,8 @@ struct MemTraceEvent { MemTraceEvent(uint64_t timestamp_ns, uint64_t addr, TracerMemEventType type, uint64_t process_id, uint64_t thread_id, int64_t increase_bytes, const std::string& place, uint64_t current_allocated, - uint64_t current_reserved) + uint64_t current_reserved, uint64_t peak_allocated, + uint64_t peak_reserved) : timestamp_ns(timestamp_ns), addr(addr), type(type), @@ -297,7 +302,9 @@ struct MemTraceEvent { increase_bytes(increase_bytes), place(place), current_allocated(current_allocated), - current_reserved(current_reserved) {} + current_reserved(current_reserved), + peak_allocated(peak_allocated), + peak_reserved(peak_reserved) {} // timestamp of the record uint64_t timestamp_ns; @@ -318,6 +325,10 @@ struct MemTraceEvent { uint64_t current_allocated; // current total reserved memory uint64_t current_reserved; + // current peak allocated memory + uint64_t peak_allocated; + // current peak reserved memory + uint64_t peak_reserved; }; } // namespace platform diff --git a/paddle/fluid/platform/profiler/utils.cc b/paddle/fluid/platform/profiler/utils.cc index 1f8e113fdd9146b50c2504ade2efb8d46de273e7..446fa49eefbd19d36895c51ec19d04d4a2beda0f 100644 --- a/paddle/fluid/platform/profiler/utils.cc +++ b/paddle/fluid/platform/profiler/utils.cc @@ -83,7 +83,8 @@ float CalculateEstOccupancy(uint32_t DeviceId, uint16_t RegistersPerThread, #endif const char* StringTracerMemEventType(TracerMemEventType type) { - static const char* categary_name_[] = {"Allocate", "Free"}; + static const char* categary_name_[] = {"Allocate", "Free", "ReservedAllocate", + "ReservedFree"}; return categary_name_[static_cast(type)]; }