未验证 提交 35694c2a 编写于 作者: C chenjian 提交者: GitHub

add more field to memory record (#43578)

上级 4c3969fa
......@@ -17,6 +17,7 @@ limitations under the License. */
#include <cstdio>
#include <ctime>
#include <limits>
#include <regex>
#include "glog/logging.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
......@@ -125,22 +126,26 @@ void ChromeTracingLogger::LogMemTraceEventNode(
std::string(
R"JSON(
{
"name": "[memory]", "pid": %lld, "tid": "%lld",
"name": "[memory]", "pid": %lld, "tid": "%lld(C++)",
"ts": %lld,
"ph": "i", "cat": "%s",
"args": {
"place": "%s",
"addr": "%llu",
"increase_bytes": %lld,
"current_allocated": %llu,
"current_reserved": %llu,
"increase_bytes": %lld
"peak_allocated": %llu,
"peak_reserved": %llu
}
},
)JSON"),
mem_node.ProcessId(), mem_node.ThreadId(), mem_node.TimeStampNs(),
mem_node.ProcessId(), mem_node.ThreadId(), nsToUs(mem_node.TimeStampNs()),
StringTracerMemEventType(mem_node.Type()), mem_node.Place().c_str(),
mem_node.Addr(), mem_node.CurrentAllocated(), mem_node.CurrentReserved(),
mem_node.IncreaseBytes());
mem_node.Addr(), mem_node.IncreaseBytes(), mem_node.CurrentAllocated(),
mem_node.CurrentReserved(), mem_node.PeakAllocated(),
mem_node.PeakReserved());
pid_tid_set_.insert({mem_node.ProcessId(), mem_node.ThreadId()});
}
void ChromeTracingLogger::LogHostTraceEventNode(
......@@ -164,6 +169,8 @@ void ChromeTracingLogger::LogHostTraceEventNode(
input_shapes = op_supplement_node->InputShapes();
input_dtypes = op_supplement_node->Dtypes();
callstack = op_supplement_node->CallStack();
callstack = std::regex_replace(callstack, std::regex("\""), "\'");
callstack = std::regex_replace(callstack, std::regex("\n"), "\\n");
}
switch (host_node.Type()) {
case TracerEventType::ProfileStep:
......
......@@ -209,6 +209,8 @@ MemTraceEventNode* DeserializationReader::RestoreMemTraceEventNode(
mem_event.place = mem_event_proto.place();
mem_event.current_allocated = mem_event_proto.current_allocated();
mem_event.current_reserved = mem_event_proto.current_reserved();
mem_event.peak_allocated = mem_event_proto.peak_allocated();
mem_event.peak_reserved = mem_event_proto.peak_reserved();
return new MemTraceEventNode(mem_event);
}
......
......@@ -51,10 +51,14 @@ enum TracerEventTypeProto {
};
enum TracerMemEventTypeProto {
// Used to mark memory allocation
// Used to mark memory allocation which is managed by paddle
Allocate = 0;
// Used to mark memory free
// Used to mark memory free which is managed by paddle
Free = 1;
// Used to mark reserved memory allocation which is applied from device.
ReservedAllocate = 2;
// Used to mark reserved memory free which is released to device.
ReservedFree = 3;
};
message KernelEventInfoProto {
......@@ -150,6 +154,10 @@ message MemTraceEventProto {
required uint64 current_allocated = 8;
// current total reserved memory
required uint64 current_reserved = 9;
// current peak allocated memory
required uint64 peak_allocated = 10;
// current peak reserved memory
required uint64 peak_reserved = 11;
}
message OperatorSupplementEventProto {
......
......@@ -130,6 +130,8 @@ void SerializationLogger::LogMemTraceEventNode(
mem_trace_event->set_place(mem_node.Place());
mem_trace_event->set_current_allocated(mem_node.CurrentAllocated());
mem_trace_event->set_current_reserved(mem_node.CurrentReserved());
mem_trace_event->set_peak_allocated(mem_node.PeakAllocated());
mem_trace_event->set_peak_reserved(mem_node.PeakReserved());
current_mem_trace_event_node_proto_->set_allocated_mem_event(mem_trace_event);
}
......
......@@ -53,9 +53,9 @@ TEST(SerializationLoggerTest, dump_case0) {
std::string("op3"), TracerEventType::Operator, 31000, 40000, 10, 11));
mem_events.push_back(MemTraceEvent(11500, 0x1000,
TracerMemEventType::Allocate, 10, 10, 50,
"GPU:0", 50, 50));
"GPU:0", 50, 50, 100, 100));
mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free,
10, 10, -50, "GPU:0", 0, 50));
10, 10, -50, "GPU:0", 0, 50, 100, 100));
std::map<std::string, std::vector<std::vector<int64_t>>> input_shapes;
std::map<std::string, std::vector<std::string>> dtypes;
input_shapes[std::string("X")].push_back(std::vector<int64_t>{1, 2, 3});
......
......@@ -47,6 +47,8 @@ class MemTraceEventNode {
std::string Place() const { return mem_event_.place; }
uint64_t CurrentAllocated() const { return mem_event_.current_allocated; }
uint64_t CurrentReserved() const { return mem_event_.current_reserved; }
uint64_t PeakAllocated() const { return mem_event_.peak_allocated; }
uint64_t PeakReserved() const { return mem_event_.peak_reserved; }
// member function
void LogMe(BaseLogger* logger) { logger->LogMemTraceEventNode(*this); }
......
......@@ -93,6 +93,8 @@ HostPythonNode* ProfilerResult::CopyTree(HostTraceEventNode* root) {
mem_python_node->place = (*memnode)->Place();
mem_python_node->current_allocated = (*memnode)->CurrentAllocated();
mem_python_node->current_reserved = (*memnode)->CurrentReserved();
mem_python_node->peak_allocated = (*memnode)->PeakAllocated();
mem_python_node->peak_reserved = (*memnode)->PeakReserved();
host_python_node->mem_node_ptrs.push_back(mem_python_node);
}
// copy OperatorSupplementEventNode's information if exists
......
......@@ -66,6 +66,10 @@ struct MemPythonNode {
uint64_t current_allocated;
// current total reserved memory
uint64_t current_reserved;
// peak allocated memory
uint64_t peak_allocated;
// peak reserved memory
uint64_t peak_reserved;
};
struct HostPythonNode {
......
......@@ -50,9 +50,9 @@ TEST(NodeTreesTest, LogMe_case0) {
std::string("op3"), TracerEventType::Operator, 31000, 40000, 10, 11));
mem_events.push_back(MemTraceEvent(11500, 0x1000,
TracerMemEventType::Allocate, 10, 10, 50,
"GPU:0", 50, 50));
"GPU:0", 50, 50, 100, 100));
mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free,
10, 10, -50, "GPU:0", 0, 50));
10, 10, -50, "GPU:0", 0, 50, 100, 100));
std::map<std::string, std::vector<std::vector<int64_t>>> input_shapes;
std::map<std::string, std::vector<std::string>> dtypes;
input_shapes[std::string("X")].push_back(std::vector<int64_t>{1, 2, 3});
......@@ -185,9 +185,9 @@ TEST(NodeTreesTest, HandleTrees_case0) {
std::string("op3"), TracerEventType::Operator, 2000, 120000, 10, 11));
mem_events.push_back(MemTraceEvent(11500, 0x1000,
TracerMemEventType::Allocate, 10, 10, 50,
"GPU:0", 50, 50));
"GPU:0", 50, 50, 100, 100));
mem_events.push_back(MemTraceEvent(11900, 0x1000, TracerMemEventType::Free,
10, 10, -50, "GPU:0", 0, 50));
10, 10, -50, "GPU:0", 0, 50, 100, 100));
op_supplement_events.push_back(OperatorSupplementEvent(
11600, "op1", std::map<std::string, std::vector<std::vector<int64_t>>>(),
std::map<std::string, std::vector<std::string>>(), "op1()", 10, 10));
......
......@@ -59,10 +59,14 @@ enum class TracerEventType {
};
enum class TracerMemEventType {
// Used to mark memory allocation
// Used to mark memory allocation which is managed by paddle
Allocate = 0,
// Used to mark memory free
// Used to mark memory free which is managed by paddle
Free = 1,
// Used to mark reserved memory allocation which is applied from device.
ReservedAllocate = 2,
// Used to mark reserved memory free which is released to device.
ReservedFree = 3,
// A flag to denote the number of current types
NumTypes
};
......@@ -288,7 +292,8 @@ struct MemTraceEvent {
MemTraceEvent(uint64_t timestamp_ns, uint64_t addr, TracerMemEventType type,
uint64_t process_id, uint64_t thread_id, int64_t increase_bytes,
const std::string& place, uint64_t current_allocated,
uint64_t current_reserved)
uint64_t current_reserved, uint64_t peak_allocated,
uint64_t peak_reserved)
: timestamp_ns(timestamp_ns),
addr(addr),
type(type),
......@@ -297,7 +302,9 @@ struct MemTraceEvent {
increase_bytes(increase_bytes),
place(place),
current_allocated(current_allocated),
current_reserved(current_reserved) {}
current_reserved(current_reserved),
peak_allocated(peak_allocated),
peak_reserved(peak_reserved) {}
// timestamp of the record
uint64_t timestamp_ns;
......@@ -318,6 +325,10 @@ struct MemTraceEvent {
uint64_t current_allocated;
// current total reserved memory
uint64_t current_reserved;
// current peak allocated memory
uint64_t peak_allocated;
// current peak reserved memory
uint64_t peak_reserved;
};
} // namespace platform
......
......@@ -83,7 +83,8 @@ float CalculateEstOccupancy(uint32_t DeviceId, uint16_t RegistersPerThread,
#endif
const char* StringTracerMemEventType(TracerMemEventType type) {
static const char* categary_name_[] = {"Allocate", "Free"};
static const char* categary_name_[] = {"Allocate", "Free", "ReservedAllocate",
"ReservedFree"};
return categary_name_[static_cast<int>(type)];
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册