未验证 提交 57b2033b 编写于 作者: C chenjian 提交者: GitHub

add trace event data structure definition (#39109)

* add trace event data structure definition

* convert enum item to string for cupti enum explaination

* modify paddle_enforce_eq description
上级 80753755
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <functional>
#include <list>
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/profiler/output_logger.h"
#include "paddle/fluid/platform/profiler/trace_event.h"
namespace paddle {
namespace platform {
class DeviceTraceEventNode {
public:
// constructor
explicit DeviceTraceEventNode(const DeviceTraceEvent& device_event)
: device_event_(device_event) {}
// destructor
~DeviceTraceEventNode() {}
// getter
std::string name() const { return device_event_.name; }
TracerEventType type() const { return device_event_.type; }
uint64_t start_ns() const { return device_event_.start_ns; }
uint64_t end_ns() const { return device_event_.end_ns; }
uint64_t device_id() const { return device_event_.device_id; }
uint64_t context_id() const { return device_event_.context_id; }
uint64_t stream_id() const { return device_event_.stream_id; }
uint64_t duration() const {
return device_event_.end_ns - device_event_.start_ns;
}
uint32_t correlation_id() const { return device_event_.correlation_id; }
KernelEventInfo kernel_info() const {
PADDLE_ENFORCE_EQ(
device_event_.type, TracerEventType::Kernel,
platform::errors::Unavailable(
"Can not kernel_info, "
"TracerEventType in node must be TracerEventType::Kernel."));
return device_event_.kernel_info;
}
MemcpyEventInfo memcpy_info() const {
PADDLE_ENFORCE_EQ(
device_event_.type, TracerEventType::Memcpy,
platform::errors::Unavailable(
"Can not get memcpy_info, "
"TracerEventType in node must be TracerEventType::Memcpy."));
return device_event_.memcpy_info;
}
MemsetEventInfo memset_info() const {
PADDLE_ENFORCE_EQ(
device_event_.type, TracerEventType::Memset,
platform::errors::Unavailable(
"Can not get memset_info, "
"TracerEventType in node must be TracerEventType::Memset."));
return device_event_.memset_info;
}
// member function
void LogMe(BaseLogger* logger) { logger->LogDeviceTraceEventNode(*this); }
private:
// data
DeviceTraceEvent device_event_;
};
class CudaRuntimeTraceEventNode {
public:
// constructor
explicit CudaRuntimeTraceEventNode(const RuntimeTraceEvent& runtime_event)
: runtime_event_(runtime_event) {}
// destructor
~CudaRuntimeTraceEventNode();
// getter
std::string name() const { return runtime_event_.name; }
TracerEventType type() const { return runtime_event_.type; }
uint64_t start_ns() const { return runtime_event_.start_ns; }
uint64_t end_ns() const { return runtime_event_.end_ns; }
uint64_t process_id() const { return runtime_event_.process_id; }
uint64_t thread_id() const { return runtime_event_.thread_id; }
uint64_t duration() const {
return runtime_event_.end_ns - runtime_event_.start_ns;
}
uint32_t correlation_id() const { return runtime_event_.correlation_id; }
uint32_t callback_id() const { return runtime_event_.callback_id; }
// member function
void AddDeviceTraceEventNode(DeviceTraceEventNode* node) {
device_node_ptrs_.push_back(node);
}
void LogMe(BaseLogger* logger) { logger->LogRuntimeTraceEventNode(*this); }
std::vector<DeviceTraceEventNode*>& GetDeviceTraceEventNodes() {
return device_node_ptrs_;
}
private:
// data
RuntimeTraceEvent runtime_event_;
// device events called by this
std::vector<DeviceTraceEventNode*> device_node_ptrs_;
};
class HostTraceEventNode {
public:
// constructor
explicit HostTraceEventNode(const HostTraceEvent& host_event)
: host_event_(host_event) {}
// destructor
~HostTraceEventNode();
// getter
std::string name() const { return host_event_.name; }
TracerEventType type() const { return host_event_.type; }
uint64_t start_ns() const { return host_event_.start_ns; }
uint64_t end_ns() const { return host_event_.end_ns; }
uint64_t process_id() const { return host_event_.process_id; }
uint64_t thread_id() const { return host_event_.thread_id; }
uint64_t duration() const {
return host_event_.end_ns - host_event_.start_ns;
}
// member function
void AddChild(HostTraceEventNode* node) { children_.push_back(node); }
void AddCudaRuntimeNode(CudaRuntimeTraceEventNode* node) {
runtime_node_ptrs_.push_back(node);
}
std::vector<HostTraceEventNode*>& GetChildren() { return children_; }
std::vector<CudaRuntimeTraceEventNode*>& GetRuntimeTraceEventNodes() {
return runtime_node_ptrs_;
}
void LogMe(BaseLogger* logger) { logger->LogHostTraceEventNode(*this); }
private:
// data
HostTraceEvent host_event_;
// cuda runtime events called by this
std::vector<CudaRuntimeTraceEventNode*> runtime_node_ptrs_;
// host events called by this
std::vector<HostTraceEventNode*> children_;
};
class NodeTrees {
public:
// constructor
NodeTrees(const std::list<HostTraceEvent>& host_events,
const std::list<RuntimeTraceEvent>& runtime_events,
const std::list<DeviceTraceEvent>& device_events) {
std::vector<HostTraceEventNode*> host_event_nodes;
std::vector<CudaRuntimeTraceEventNode*> runtime_event_nodes;
std::vector<DeviceTraceEventNode*> device_event_nodes;
// encapsulate event into nodes
for (auto it = host_events.begin(); it != host_events.end(); ++it) {
host_event_nodes.push_back(new HostTraceEventNode(*it));
}
for (auto it = runtime_events.begin(); it != runtime_events.end(); ++it) {
runtime_event_nodes.push_back(new CudaRuntimeTraceEventNode(*it));
}
for (auto it = device_events.begin(); it != device_events.end(); ++it) {
device_event_nodes.push_back(new DeviceTraceEventNode(*it));
}
// build tree
BuildTrees(host_event_nodes, runtime_event_nodes, device_event_nodes);
}
explicit NodeTrees(
const std::map<uint64_t, HostTraceEventNode*>& thread_event_trees_map)
: thread_event_trees_map_(thread_event_trees_map) {}
// destructor
~NodeTrees();
void LogMe(BaseLogger* logger);
void HandleTrees(std::function<void(HostTraceEventNode*)>,
std::function<void(CudaRuntimeTraceEventNode*)>,
std::function<void(DeviceTraceEventNode*)>);
std::map<uint64_t, HostTraceEventNode*> GetNodeTrees() {
return thread_event_trees_map_;
}
std::map<uint64_t, std::vector<HostTraceEventNode*>> Traverse(bool bfs) const;
private:
std::map<uint64_t, HostTraceEventNode*> thread_event_trees_map_;
void BuildTrees(const std::vector<HostTraceEventNode*>&,
std::vector<CudaRuntimeTraceEventNode*>&,
const std::vector<DeviceTraceEventNode*>&);
HostTraceEventNode* BuildTreeRelationship(
std::vector<HostTraceEventNode*> host_event_nodes,
std::vector<CudaRuntimeTraceEventNode*> runtime_event_nodes);
};
} // namespace platform
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <map>
#include "paddle/fluid/platform/profiler/event_node.h"
namespace paddle {
namespace platform {
struct DevicePythonNode {
DevicePythonNode() = default;
~DevicePythonNode() {}
// record name
std::string name;
// record type, one of TracerEventType
TracerEventType type;
// start timestamp of the record
uint64_t start_ns;
// end timestamp of the record
uint64_t end_ns;
// device id
uint64_t device_id;
// context id
uint64_t context_id;
// stream id
uint64_t stream_id;
};
struct HostPythonNode {
HostPythonNode() = default;
~HostPythonNode();
// record name
std::string name;
// record type, one of TracerEventType
TracerEventType type;
// start timestamp of the record
uint64_t start_ns;
// end timestamp of the record
uint64_t end_ns;
// process id of the record
uint64_t process_id;
// thread id of the record
uint64_t thread_id;
// children node
std::vector<HostPythonNode*> children_node_ptrs;
// runtime node
std::vector<HostPythonNode*> runtime_node_ptrs;
// device node
std::vector<DevicePythonNode*> device_node_ptrs;
};
class ProfilerResult {
public:
ProfilerResult() : tree_(nullptr) {}
explicit ProfilerResult(NodeTrees* tree);
~ProfilerResult();
std::map<uint64_t, HostPythonNode*> GetData() {
return thread_event_trees_map;
}
void Save(const std::string& file_name);
private:
std::map<uint64_t, HostPythonNode*> thread_event_trees_map;
NodeTrees* tree_;
HostPythonNode* CopyTree(HostTraceEventNode* node);
};
} // namespace platform
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <fstream>
#include <ostream>
namespace paddle {
namespace platform {
class DeviceTraceEventNode; // forward declaration
class HostTraceEventNode; // forward declaration
class CudaRuntimeTraceEventNode; // forward declaration
class NodeTrees; // forward declaration
class BaseLogger {
public:
BaseLogger() {}
virtual ~BaseLogger() {}
virtual void LogDeviceTraceEventNode(const DeviceTraceEventNode&) {}
virtual void LogHostTraceEventNode(const HostTraceEventNode&) {}
virtual void LogRuntimeTraceEventNode(const CudaRuntimeTraceEventNode&) {}
virtual void LogNodeTrees(const NodeTrees&) {}
virtual void LogMetaInfo() {}
};
} // namespace platform
} // namespace paddle
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
namespace paddle {
namespace platform {
enum class TracerEventType {
// Used to mark operator record
Operator = 0,
// Used to mark dataloader record
Dataloader = 1,
// Used to mark profile step record
ProfileStep = 2,
// Used to mark cuda runtime record returned by cupti
CudaRuntime = 3,
// Used to mark kernel computation record returned by cupti
Kernel = 4,
// Used to mark memcpy record returned by cupti
Memcpy = 5,
// Used to mark memset record returned by cupti
Memset = 6,
// Used to mark record defined by user
UserDefined = 7,
// A flag to denote the number of current types
NumTypes
};
struct KernelEventInfo {
// The X-dimension block size for the kernel.
uint32_t block_x;
// The Y-dimension block size for the kernel.
uint32_t block_y;
// The Z-dimension grid size for the kernel.
uint32_t block_z;
// X-dimension of a grid.
uint32_t grid_x;
// Y-dimension of a grid.
uint32_t grid_y;
// Z-dimension of a grid.
uint32_t grid_z;
// The dynamic shared memory reserved for the kernel, in bytes.
uint32_t dynamic_shared_memory;
// The static shared memory allocated for the kernel, in bytes.
uint32_t static_shared_memory;
// The number of registers required for each thread executing the kernel.
uint32_t registers_per_thread;
// The amount of local memory reserved for each thread, in bytes.
uint32_t local_memory_per_thread;
// The total amount of local memory reserved for the kernel, in bytes.
uint32_t local_memory_total;
// The timestamp when the kernel is queued up in the command buffer, in ns.
// This timestamp is not collected by default. Use API
// cuptiActivityEnableLatencyTimestamps() to enable collection.
uint64_t queued;
// The timestamp when the command buffer containing the kernel launch is
// submitted to the GPU, in ns.
// This timestamp is not collected by default. Use API
// cuptiActivityEnableLatencyTimestamps() to enable collection.
uint64_t submitted;
// The completed timestamp for the kernel execution, in ns.
uint64_t completed;
};
struct MemcpyEventInfo {
// The number of bytes transferred by the memory copy.
uint64_t num_bytes;
// The kind of the memory copy.
// Each kind represents the source and destination targets of a memory copy.
// Targets are host, device, and array. Refer to CUpti_ActivityMemcpyKind
std::string copy_kind;
// The source memory kind read by the memory copy.
// Each kind represents the type of the memory accessed by a memory
// operation/copy. Refer to CUpti_ActivityMemoryKind
std::string src_kind;
// The destination memory kind read by the memory copy.
std::string dst_kind;
};
struct MemsetEventInfo {
// The number of bytes being set by the memory set.
uint64_t num_bytes;
// The memory kind of the memory set. Refer to CUpti_ActivityMemoryKind
std::string memory_kind;
// the value being assigned to memory by the memory set.
uint32_t value;
};
struct HostTraceEvent {
HostTraceEvent() = default;
HostTraceEvent(const std::string& name, TracerEventType type,
uint64_t start_ns, uint64_t end_ns, uint64_t process_id,
uint64_t thread_id)
: name(name),
type(type),
start_ns(start_ns),
end_ns(end_ns),
process_id(process_id),
thread_id(thread_id) {}
// record name
std::string name;
// record type, one of TracerEventType
TracerEventType type;
// start timestamp of the record
uint64_t start_ns;
// end timestamp of the record
uint64_t end_ns;
// process id of the record
uint64_t process_id;
// thread id of the record
uint64_t thread_id;
};
struct RuntimeTraceEvent {
RuntimeTraceEvent() = default;
RuntimeTraceEvent(const std::string& name, uint64_t start_ns, uint64_t end_ns,
uint64_t process_id, uint64_t thread_id,
uint32_t correlation_id, uint32_t callback_id)
: name(name),
start_ns(start_ns),
end_ns(end_ns),
process_id(process_id),
thread_id(thread_id),
correlation_id(correlation_id),
callback_id(callback_id) {}
// record name
std::string name;
// record type, one of TracerEventType
TracerEventType type{TracerEventType::CudaRuntime};
// start timestamp of the record
uint64_t start_ns;
// end timestamp of the record
uint64_t end_ns;
// process id of the record
uint64_t process_id;
// thread id of the record
uint64_t thread_id;
// correlation id, used for correlating async activities happened on device
uint32_t correlation_id;
// callback id, used to identify which cuda runtime api is called
uint32_t callback_id;
};
struct DeviceTraceEvent {
DeviceTraceEvent() = default;
DeviceTraceEvent(const std::string& name, TracerEventType type,
uint64_t start_ns, uint64_t end_ns, uint64_t device_id,
uint64_t context_id, uint64_t stream_id,
uint32_t correlation_id, const KernelEventInfo& kernel_info)
: name(name),
type(type),
start_ns(start_ns),
end_ns(end_ns),
device_id(device_id),
context_id(context_id),
stream_id(stream_id),
correlation_id(correlation_id),
kernel_info(kernel_info) {}
DeviceTraceEvent(const std::string& name, TracerEventType type,
uint64_t start_ns, uint64_t end_ns, uint64_t device_id,
uint64_t context_id, uint64_t stream_id,
uint32_t correlation_id, const MemcpyEventInfo& memcpy_info)
: name(name),
type(type),
start_ns(start_ns),
end_ns(end_ns),
device_id(device_id),
context_id(context_id),
stream_id(stream_id),
correlation_id(correlation_id),
memcpy_info(memcpy_info) {}
DeviceTraceEvent(const std::string& name, TracerEventType type,
uint64_t start_ns, uint64_t end_ns, uint64_t device_id,
uint64_t context_id, uint64_t stream_id,
uint32_t correlation_id, const MemsetEventInfo& memset_info)
: name(name),
type(type),
start_ns(start_ns),
end_ns(end_ns),
device_id(device_id),
context_id(context_id),
stream_id(stream_id),
correlation_id(correlation_id),
memset_info(memset_info) {}
// record name
std::string name;
// record type, one of TracerEventType
TracerEventType type;
// start timestamp of the record
uint64_t start_ns;
// end timestamp of the record
uint64_t end_ns;
// device id
uint64_t device_id;
// context id
uint64_t context_id;
// stream id
uint64_t stream_id;
// correlation id, used for correlating async activities happened on device
uint32_t correlation_id;
// union, specific device record type has different detail information
union {
// used for TracerEventType::Kernel
KernelEventInfo kernel_info;
// used for TracerEventType::Memcpy
MemcpyEventInfo memcpy_info;
// used for TracerEventType::Memset
MemsetEventInfo memset_info;
};
};
} // namespace platform
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册