未验证 提交 35694c2a 编写于 作者: C chenjian 提交者: GitHub

add more field to memory record (#43578)

上级 4c3969fa
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include <cstdio> #include <cstdio>
#include <ctime> #include <ctime>
#include <limits> #include <limits>
#include <regex>
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h"
...@@ -125,22 +126,26 @@ void ChromeTracingLogger::LogMemTraceEventNode( ...@@ -125,22 +126,26 @@ void ChromeTracingLogger::LogMemTraceEventNode(
std::string( std::string(
R"JSON( R"JSON(
{ {
"name": "[memory]", "pid": %lld, "tid": "%lld", "name": "[memory]", "pid": %lld, "tid": "%lld(C++)",
"ts": %lld, "ts": %lld,
"ph": "i", "cat": "%s", "ph": "i", "cat": "%s",
"args": { "args": {
"place": "%s", "place": "%s",
"addr": "%llu", "addr": "%llu",
"increase_bytes": %lld,
"current_allocated": %llu, "current_allocated": %llu,
"current_reserved": %llu, "current_reserved": %llu,
"increase_bytes": %lld "peak_allocated": %llu,
"peak_reserved": %llu
} }
}, },
)JSON"), )JSON"),
mem_node.ProcessId(), mem_node.ThreadId(), mem_node.TimeStampNs(), mem_node.ProcessId(), mem_node.ThreadId(), nsToUs(mem_node.TimeStampNs()),
StringTracerMemEventType(mem_node.Type()), mem_node.Place().c_str(), StringTracerMemEventType(mem_node.Type()), mem_node.Place().c_str(),
mem_node.Addr(), mem_node.CurrentAllocated(), mem_node.CurrentReserved(), mem_node.Addr(), mem_node.IncreaseBytes(), mem_node.CurrentAllocated(),
mem_node.IncreaseBytes()); mem_node.CurrentReserved(), mem_node.PeakAllocated(),
mem_node.PeakReserved());
pid_tid_set_.insert({mem_node.ProcessId(), mem_node.ThreadId()});
} }
void ChromeTracingLogger::LogHostTraceEventNode( void ChromeTracingLogger::LogHostTraceEventNode(
...@@ -164,6 +169,8 @@ void ChromeTracingLogger::LogHostTraceEventNode( ...@@ -164,6 +169,8 @@ void ChromeTracingLogger::LogHostTraceEventNode(
input_shapes = op_supplement_node->InputShapes(); input_shapes = op_supplement_node->InputShapes();
input_dtypes = op_supplement_node->Dtypes(); input_dtypes = op_supplement_node->Dtypes();
callstack = op_supplement_node->CallStack(); callstack = op_supplement_node->CallStack();
callstack = std::regex_replace(callstack, std::regex("\""), "\'");
callstack = std::regex_replace(callstack, std::regex("\n"), "\\n");
} }
switch (host_node.Type()) { switch (host_node.Type()) {
case TracerEventType::ProfileStep: case TracerEventType::ProfileStep:
......
...@@ -209,6 +209,8 @@ MemTraceEventNode* DeserializationReader::RestoreMemTraceEventNode( ...@@ -209,6 +209,8 @@ MemTraceEventNode* DeserializationReader::RestoreMemTraceEventNode(
mem_event.place = mem_event_proto.place(); mem_event.place = mem_event_proto.place();
mem_event.current_allocated = mem_event_proto.current_allocated(); mem_event.current_allocated = mem_event_proto.current_allocated();
mem_event.current_reserved = mem_event_proto.current_reserved(); mem_event.current_reserved = mem_event_proto.current_reserved();
mem_event.peak_allocated = mem_event_proto.peak_allocated();
mem_event.peak_reserved = mem_event_proto.peak_reserved();
return new MemTraceEventNode(mem_event); return new MemTraceEventNode(mem_event);
} }
......
...@@ -51,10 +51,14 @@ enum TracerEventTypeProto { ...@@ -51,10 +51,14 @@ enum TracerEventTypeProto {
}; };
enum TracerMemEventTypeProto { enum TracerMemEventTypeProto {
// Used to mark memory allocation // Used to mark memory allocation which is managed by paddle
Allocate = 0; Allocate = 0;
// Used to mark memory free // Used to mark memory free which is managed by paddle
Free = 1; Free = 1;
// Used to mark reserved memory allocation which is applied from device.
ReservedAllocate = 2;
// Used to mark reserved memory free which is released to device.
ReservedFree = 3;
}; };
message KernelEventInfoProto { message KernelEventInfoProto {
...@@ -150,6 +154,10 @@ message MemTraceEventProto { ...@@ -150,6 +154,10 @@ message MemTraceEventProto {
required uint64 current_allocated = 8; required uint64 current_allocated = 8;
// current total reserved memory // current total reserved memory
required uint64 current_reserved = 9; required uint64 current_reserved = 9;
// current peak allocated memory
required uint64 peak_allocated = 10;
// current peak reserved memory
required uint64 peak_reserved = 11;
} }
message OperatorSupplementEventProto { message OperatorSupplementEventProto {
......
...@@ -130,6 +130,8 @@ void SerializationLogger::LogMemTraceEventNode( ...@@ -130,6 +130,8 @@ void SerializationLogger::LogMemTraceEventNode(
mem_trace_event->set_place(mem_node.Place()); mem_trace_event->set_place(mem_node.Place());
mem_trace_event->set_current_allocated(mem_node.CurrentAllocated()); mem_trace_event->set_current_allocated(mem_node.CurrentAllocated());
mem_trace_event->set_current_reserved(mem_node.CurrentReserved()); mem_trace_event->set_current_reserved(mem_node.CurrentReserved());
mem_trace_event->set_peak_allocated(mem_node.PeakAllocated());
mem_trace_event->set_peak_reserved(mem_node.PeakReserved());
current_mem_trace_event_node_proto_->set_allocated_mem_event(mem_trace_event); current_mem_trace_event_node_proto_->set_allocated_mem_event(mem_trace_event);
} }
......
...@@ -53,9 +53,9 @@ TEST(SerializationLoggerTest, dump_case0) { ...@@ -53,9 +53,9 @@ TEST(SerializationLoggerTest, dump_case0) {
std::string("op3"), TracerEventType::Operator, 31000, 40000, 10, 11)); std::string("op3"), TracerEventType::Operator, 31000, 40000, 10, 11));
mem_events.push_back(MemTraceEvent(11500, 0x1000, mem_events.push_back(MemTraceEvent(11500, 0x1000,
TracerMemEventType::Allocate, 10, 10, 50, TracerMemEventType::Allocate, 10, 10, 50,
"GPU:0", 50, 50)); "GPU:0", 50, 50, 100, 100));
mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free, mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free,
10, 10, -50, "GPU:0", 0, 50)); 10, 10, -50, "GPU:0", 0, 50, 100, 100));
std::map<std::string, std::vector<std::vector<int64_t>>> input_shapes; std::map<std::string, std::vector<std::vector<int64_t>>> input_shapes;
std::map<std::string, std::vector<std::string>> dtypes; std::map<std::string, std::vector<std::string>> dtypes;
input_shapes[std::string("X")].push_back(std::vector<int64_t>{1, 2, 3}); input_shapes[std::string("X")].push_back(std::vector<int64_t>{1, 2, 3});
......
...@@ -47,6 +47,8 @@ class MemTraceEventNode { ...@@ -47,6 +47,8 @@ class MemTraceEventNode {
std::string Place() const { return mem_event_.place; } std::string Place() const { return mem_event_.place; }
uint64_t CurrentAllocated() const { return mem_event_.current_allocated; } uint64_t CurrentAllocated() const { return mem_event_.current_allocated; }
uint64_t CurrentReserved() const { return mem_event_.current_reserved; } uint64_t CurrentReserved() const { return mem_event_.current_reserved; }
uint64_t PeakAllocated() const { return mem_event_.peak_allocated; }
uint64_t PeakReserved() const { return mem_event_.peak_reserved; }
// member function // member function
void LogMe(BaseLogger* logger) { logger->LogMemTraceEventNode(*this); } void LogMe(BaseLogger* logger) { logger->LogMemTraceEventNode(*this); }
......
...@@ -93,6 +93,8 @@ HostPythonNode* ProfilerResult::CopyTree(HostTraceEventNode* root) { ...@@ -93,6 +93,8 @@ HostPythonNode* ProfilerResult::CopyTree(HostTraceEventNode* root) {
mem_python_node->place = (*memnode)->Place(); mem_python_node->place = (*memnode)->Place();
mem_python_node->current_allocated = (*memnode)->CurrentAllocated(); mem_python_node->current_allocated = (*memnode)->CurrentAllocated();
mem_python_node->current_reserved = (*memnode)->CurrentReserved(); mem_python_node->current_reserved = (*memnode)->CurrentReserved();
mem_python_node->peak_allocated = (*memnode)->PeakAllocated();
mem_python_node->peak_reserved = (*memnode)->PeakReserved();
host_python_node->mem_node_ptrs.push_back(mem_python_node); host_python_node->mem_node_ptrs.push_back(mem_python_node);
} }
// copy OperatorSupplementEventNode's information if exists // copy OperatorSupplementEventNode's information if exists
......
...@@ -66,6 +66,10 @@ struct MemPythonNode { ...@@ -66,6 +66,10 @@ struct MemPythonNode {
uint64_t current_allocated; uint64_t current_allocated;
// current total reserved memory // current total reserved memory
uint64_t current_reserved; uint64_t current_reserved;
// peak allocated memory
uint64_t peak_allocated;
// peak reserved memory
uint64_t peak_reserved;
}; };
struct HostPythonNode { struct HostPythonNode {
......
...@@ -50,9 +50,9 @@ TEST(NodeTreesTest, LogMe_case0) { ...@@ -50,9 +50,9 @@ TEST(NodeTreesTest, LogMe_case0) {
std::string("op3"), TracerEventType::Operator, 31000, 40000, 10, 11)); std::string("op3"), TracerEventType::Operator, 31000, 40000, 10, 11));
mem_events.push_back(MemTraceEvent(11500, 0x1000, mem_events.push_back(MemTraceEvent(11500, 0x1000,
TracerMemEventType::Allocate, 10, 10, 50, TracerMemEventType::Allocate, 10, 10, 50,
"GPU:0", 50, 50)); "GPU:0", 50, 50, 100, 100));
mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free, mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free,
10, 10, -50, "GPU:0", 0, 50)); 10, 10, -50, "GPU:0", 0, 50, 100, 100));
std::map<std::string, std::vector<std::vector<int64_t>>> input_shapes; std::map<std::string, std::vector<std::vector<int64_t>>> input_shapes;
std::map<std::string, std::vector<std::string>> dtypes; std::map<std::string, std::vector<std::string>> dtypes;
input_shapes[std::string("X")].push_back(std::vector<int64_t>{1, 2, 3}); input_shapes[std::string("X")].push_back(std::vector<int64_t>{1, 2, 3});
...@@ -185,9 +185,9 @@ TEST(NodeTreesTest, HandleTrees_case0) { ...@@ -185,9 +185,9 @@ TEST(NodeTreesTest, HandleTrees_case0) {
std::string("op3"), TracerEventType::Operator, 2000, 120000, 10, 11)); std::string("op3"), TracerEventType::Operator, 2000, 120000, 10, 11));
mem_events.push_back(MemTraceEvent(11500, 0x1000, mem_events.push_back(MemTraceEvent(11500, 0x1000,
TracerMemEventType::Allocate, 10, 10, 50, TracerMemEventType::Allocate, 10, 10, 50,
"GPU:0", 50, 50)); "GPU:0", 50, 50, 100, 100));
mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free, mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free,
10, 10, -50, "GPU:0", 0, 50)); 10, 10, -50, "GPU:0", 0, 50, 100, 100));
op_supplement_events.push_back(OperatorSupplementEvent( op_supplement_events.push_back(OperatorSupplementEvent(
11600, "op1", std::map<std::string, std::vector<std::vector<int64_t>>>(), 11600, "op1", std::map<std::string, std::vector<std::vector<int64_t>>>(),
std::map<std::string, std::vector<std::string>>(), "op1()", 10, 10)); std::map<std::string, std::vector<std::string>>(), "op1()", 10, 10));
......
...@@ -59,10 +59,14 @@ enum class TracerEventType { ...@@ -59,10 +59,14 @@ enum class TracerEventType {
}; };
enum class TracerMemEventType { enum class TracerMemEventType {
// Used to mark memory allocation // Used to mark memory allocation which is managed by paddle
Allocate = 0, Allocate = 0,
// Used to mark memory free // Used to mark memory free which is managed by paddle
Free = 1, Free = 1,
// Used to mark reserved memory allocation which is applied from device.
ReservedAllocate = 2,
// Used to mark reserved memory free which is released to device.
ReservedFree = 3,
// A flag to denote the number of current types // A flag to denote the number of current types
NumTypes NumTypes
}; };
...@@ -288,7 +292,8 @@ struct MemTraceEvent { ...@@ -288,7 +292,8 @@ struct MemTraceEvent {
MemTraceEvent(uint64_t timestamp_ns, uint64_t addr, TracerMemEventType type, MemTraceEvent(uint64_t timestamp_ns, uint64_t addr, TracerMemEventType type,
uint64_t process_id, uint64_t thread_id, int64_t increase_bytes, uint64_t process_id, uint64_t thread_id, int64_t increase_bytes,
const std::string& place, uint64_t current_allocated, const std::string& place, uint64_t current_allocated,
uint64_t current_reserved) uint64_t current_reserved, uint64_t peak_allocated,
uint64_t peak_reserved)
: timestamp_ns(timestamp_ns), : timestamp_ns(timestamp_ns),
addr(addr), addr(addr),
type(type), type(type),
...@@ -297,7 +302,9 @@ struct MemTraceEvent { ...@@ -297,7 +302,9 @@ struct MemTraceEvent {
increase_bytes(increase_bytes), increase_bytes(increase_bytes),
place(place), place(place),
current_allocated(current_allocated), current_allocated(current_allocated),
current_reserved(current_reserved) {} current_reserved(current_reserved),
peak_allocated(peak_allocated),
peak_reserved(peak_reserved) {}
// timestamp of the record // timestamp of the record
uint64_t timestamp_ns; uint64_t timestamp_ns;
...@@ -318,6 +325,10 @@ struct MemTraceEvent { ...@@ -318,6 +325,10 @@ struct MemTraceEvent {
uint64_t current_allocated; uint64_t current_allocated;
// current total reserved memory // current total reserved memory
uint64_t current_reserved; uint64_t current_reserved;
// current peak allocated memory
uint64_t peak_allocated;
// current peak reserved memory
uint64_t peak_reserved;
}; };
} // namespace platform } // namespace platform
......
...@@ -83,7 +83,8 @@ float CalculateEstOccupancy(uint32_t DeviceId, uint16_t RegistersPerThread, ...@@ -83,7 +83,8 @@ float CalculateEstOccupancy(uint32_t DeviceId, uint16_t RegistersPerThread,
#endif #endif
const char* StringTracerMemEventType(TracerMemEventType type) { const char* StringTracerMemEventType(TracerMemEventType type) {
static const char* categary_name_[] = {"Allocate", "Free"}; static const char* categary_name_[] = {"Allocate", "Free", "ReservedAllocate",
"ReservedFree"};
return categary_name_[static_cast<int>(type)]; return categary_name_[static_cast<int>(type)];
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册