From 57b2033be6d255a250791aff1bc71a7cc429728d Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 25 Jan 2022 19:08:42 +0800 Subject: [PATCH] add trace event data structure definition (#39109) * add trace event data structure definition * convert enum item to string for cupti enum explaination * modify paddle_enforce_eq description --- paddle/fluid/platform/profiler/event_node.h | 207 ++++++++++++++++ paddle/fluid/platform/profiler/event_python.h | 83 +++++++ .../fluid/platform/profiler/output_logger.h | 40 +++ paddle/fluid/platform/profiler/trace_event.h | 228 ++++++++++++++++++ 4 files changed, 558 insertions(+) create mode 100755 paddle/fluid/platform/profiler/event_node.h create mode 100755 paddle/fluid/platform/profiler/event_python.h create mode 100755 paddle/fluid/platform/profiler/output_logger.h create mode 100644 paddle/fluid/platform/profiler/trace_event.h diff --git a/paddle/fluid/platform/profiler/event_node.h b/paddle/fluid/platform/profiler/event_node.h new file mode 100755 index 00000000000..05190bc4666 --- /dev/null +++ b/paddle/fluid/platform/profiler/event_node.h @@ -0,0 +1,207 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/profiler/output_logger.h" +#include "paddle/fluid/platform/profiler/trace_event.h" + +namespace paddle { +namespace platform { + +class DeviceTraceEventNode { + public: + // constructor + explicit DeviceTraceEventNode(const DeviceTraceEvent& device_event) + : device_event_(device_event) {} + // destructor + ~DeviceTraceEventNode() {} + // getter + std::string name() const { return device_event_.name; } + TracerEventType type() const { return device_event_.type; } + uint64_t start_ns() const { return device_event_.start_ns; } + uint64_t end_ns() const { return device_event_.end_ns; } + uint64_t device_id() const { return device_event_.device_id; } + uint64_t context_id() const { return device_event_.context_id; } + uint64_t stream_id() const { return device_event_.stream_id; } + uint64_t duration() const { + return device_event_.end_ns - device_event_.start_ns; + } + uint32_t correlation_id() const { return device_event_.correlation_id; } + KernelEventInfo kernel_info() const { + PADDLE_ENFORCE_EQ( + device_event_.type, TracerEventType::Kernel, + platform::errors::Unavailable( + "Can not kernel_info, " + "TracerEventType in node must be TracerEventType::Kernel.")); + return device_event_.kernel_info; + } + MemcpyEventInfo memcpy_info() const { + PADDLE_ENFORCE_EQ( + device_event_.type, TracerEventType::Memcpy, + platform::errors::Unavailable( + "Can not get memcpy_info, " + "TracerEventType in node must be TracerEventType::Memcpy.")); + return device_event_.memcpy_info; + } + MemsetEventInfo memset_info() const { + PADDLE_ENFORCE_EQ( + device_event_.type, TracerEventType::Memset, + platform::errors::Unavailable( + "Can not get memset_info, " + "TracerEventType in node must be TracerEventType::Memset.")); + return device_event_.memset_info; + } + + // member function + void LogMe(BaseLogger* logger) { logger->LogDeviceTraceEventNode(*this); } + + private: + // data + DeviceTraceEvent device_event_; +}; + +class CudaRuntimeTraceEventNode { + public: + // constructor + explicit CudaRuntimeTraceEventNode(const RuntimeTraceEvent& runtime_event) + : runtime_event_(runtime_event) {} + // destructor + ~CudaRuntimeTraceEventNode(); + // getter + std::string name() const { return runtime_event_.name; } + TracerEventType type() const { return runtime_event_.type; } + uint64_t start_ns() const { return runtime_event_.start_ns; } + uint64_t end_ns() const { return runtime_event_.end_ns; } + uint64_t process_id() const { return runtime_event_.process_id; } + uint64_t thread_id() const { return runtime_event_.thread_id; } + uint64_t duration() const { + return runtime_event_.end_ns - runtime_event_.start_ns; + } + uint32_t correlation_id() const { return runtime_event_.correlation_id; } + uint32_t callback_id() const { return runtime_event_.callback_id; } + // member function + void AddDeviceTraceEventNode(DeviceTraceEventNode* node) { + device_node_ptrs_.push_back(node); + } + void LogMe(BaseLogger* logger) { logger->LogRuntimeTraceEventNode(*this); } + std::vector& GetDeviceTraceEventNodes() { + return device_node_ptrs_; + } + + private: + // data + RuntimeTraceEvent runtime_event_; + // device events called by this + std::vector device_node_ptrs_; +}; + +class HostTraceEventNode { + public: + // constructor + explicit HostTraceEventNode(const HostTraceEvent& host_event) + : host_event_(host_event) {} + + // destructor + ~HostTraceEventNode(); + + // getter + std::string name() const { return host_event_.name; } + TracerEventType type() const { return host_event_.type; } + uint64_t start_ns() const { return host_event_.start_ns; } + uint64_t end_ns() const { return host_event_.end_ns; } + uint64_t process_id() const { return host_event_.process_id; } + uint64_t thread_id() const { return host_event_.thread_id; } + uint64_t duration() const { + return host_event_.end_ns - host_event_.start_ns; + } + + // member function + void AddChild(HostTraceEventNode* node) { children_.push_back(node); } + void AddCudaRuntimeNode(CudaRuntimeTraceEventNode* node) { + runtime_node_ptrs_.push_back(node); + } + std::vector& GetChildren() { return children_; } + std::vector& GetRuntimeTraceEventNodes() { + return runtime_node_ptrs_; + } + void LogMe(BaseLogger* logger) { logger->LogHostTraceEventNode(*this); } + + private: + // data + HostTraceEvent host_event_; + // cuda runtime events called by this + std::vector runtime_node_ptrs_; + // host events called by this + std::vector children_; +}; + +class NodeTrees { + public: + // constructor + NodeTrees(const std::list& host_events, + const std::list& runtime_events, + const std::list& device_events) { + std::vector host_event_nodes; + std::vector runtime_event_nodes; + std::vector device_event_nodes; + // encapsulate event into nodes + for (auto it = host_events.begin(); it != host_events.end(); ++it) { + host_event_nodes.push_back(new HostTraceEventNode(*it)); + } + for (auto it = runtime_events.begin(); it != runtime_events.end(); ++it) { + runtime_event_nodes.push_back(new CudaRuntimeTraceEventNode(*it)); + } + for (auto it = device_events.begin(); it != device_events.end(); ++it) { + device_event_nodes.push_back(new DeviceTraceEventNode(*it)); + } + // build tree + BuildTrees(host_event_nodes, runtime_event_nodes, device_event_nodes); + } + + explicit NodeTrees( + const std::map& thread_event_trees_map) + : thread_event_trees_map_(thread_event_trees_map) {} + + // destructor + ~NodeTrees(); + + void LogMe(BaseLogger* logger); + void HandleTrees(std::function, + std::function, + std::function); + std::map GetNodeTrees() { + return thread_event_trees_map_; + } + std::map> Traverse(bool bfs) const; + + private: + std::map thread_event_trees_map_; + void BuildTrees(const std::vector&, + std::vector&, + const std::vector&); + HostTraceEventNode* BuildTreeRelationship( + std::vector host_event_nodes, + std::vector runtime_event_nodes); +}; + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/profiler/event_python.h b/paddle/fluid/platform/profiler/event_python.h new file mode 100755 index 00000000000..2241cf9e49e --- /dev/null +++ b/paddle/fluid/platform/profiler/event_python.h @@ -0,0 +1,83 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include "paddle/fluid/platform/profiler/event_node.h" + +namespace paddle { +namespace platform { + +struct DevicePythonNode { + DevicePythonNode() = default; + ~DevicePythonNode() {} + // record name + std::string name; + // record type, one of TracerEventType + TracerEventType type; + // start timestamp of the record + uint64_t start_ns; + // end timestamp of the record + uint64_t end_ns; + // device id + uint64_t device_id; + // context id + uint64_t context_id; + // stream id + uint64_t stream_id; +}; + +struct HostPythonNode { + HostPythonNode() = default; + ~HostPythonNode(); + // record name + std::string name; + // record type, one of TracerEventType + TracerEventType type; + // start timestamp of the record + uint64_t start_ns; + // end timestamp of the record + uint64_t end_ns; + // process id of the record + uint64_t process_id; + // thread id of the record + uint64_t thread_id; + // children node + std::vector children_node_ptrs; + // runtime node + std::vector runtime_node_ptrs; + // device node + std::vector device_node_ptrs; +}; + +class ProfilerResult { + public: + ProfilerResult() : tree_(nullptr) {} + explicit ProfilerResult(NodeTrees* tree); + ~ProfilerResult(); + std::map GetData() { + return thread_event_trees_map; + } + void Save(const std::string& file_name); + + private: + std::map thread_event_trees_map; + NodeTrees* tree_; + HostPythonNode* CopyTree(HostTraceEventNode* node); +}; + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/profiler/output_logger.h b/paddle/fluid/platform/profiler/output_logger.h new file mode 100755 index 00000000000..6901ed0c444 --- /dev/null +++ b/paddle/fluid/platform/profiler/output_logger.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include + +namespace paddle { +namespace platform { + +class DeviceTraceEventNode; // forward declaration +class HostTraceEventNode; // forward declaration +class CudaRuntimeTraceEventNode; // forward declaration +class NodeTrees; // forward declaration + +class BaseLogger { + public: + BaseLogger() {} + virtual ~BaseLogger() {} + virtual void LogDeviceTraceEventNode(const DeviceTraceEventNode&) {} + virtual void LogHostTraceEventNode(const HostTraceEventNode&) {} + virtual void LogRuntimeTraceEventNode(const CudaRuntimeTraceEventNode&) {} + virtual void LogNodeTrees(const NodeTrees&) {} + virtual void LogMetaInfo() {} +}; + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/profiler/trace_event.h b/paddle/fluid/platform/profiler/trace_event.h new file mode 100644 index 00000000000..e676942c458 --- /dev/null +++ b/paddle/fluid/platform/profiler/trace_event.h @@ -0,0 +1,228 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +namespace paddle { +namespace platform { + +enum class TracerEventType { + // Used to mark operator record + Operator = 0, + // Used to mark dataloader record + Dataloader = 1, + // Used to mark profile step record + ProfileStep = 2, + // Used to mark cuda runtime record returned by cupti + CudaRuntime = 3, + // Used to mark kernel computation record returned by cupti + Kernel = 4, + // Used to mark memcpy record returned by cupti + Memcpy = 5, + // Used to mark memset record returned by cupti + Memset = 6, + // Used to mark record defined by user + UserDefined = 7, + // A flag to denote the number of current types + NumTypes +}; + +struct KernelEventInfo { + // The X-dimension block size for the kernel. + uint32_t block_x; + // The Y-dimension block size for the kernel. + uint32_t block_y; + // The Z-dimension grid size for the kernel. + uint32_t block_z; + // X-dimension of a grid. + uint32_t grid_x; + // Y-dimension of a grid. + uint32_t grid_y; + // Z-dimension of a grid. + uint32_t grid_z; + // The dynamic shared memory reserved for the kernel, in bytes. + uint32_t dynamic_shared_memory; + // The static shared memory allocated for the kernel, in bytes. + uint32_t static_shared_memory; + // The number of registers required for each thread executing the kernel. + uint32_t registers_per_thread; + // The amount of local memory reserved for each thread, in bytes. + uint32_t local_memory_per_thread; + // The total amount of local memory reserved for the kernel, in bytes. + uint32_t local_memory_total; + // The timestamp when the kernel is queued up in the command buffer, in ns. + // This timestamp is not collected by default. Use API + // cuptiActivityEnableLatencyTimestamps() to enable collection. + uint64_t queued; + // The timestamp when the command buffer containing the kernel launch is + // submitted to the GPU, in ns. + // This timestamp is not collected by default. Use API + // cuptiActivityEnableLatencyTimestamps() to enable collection. + uint64_t submitted; + // The completed timestamp for the kernel execution, in ns. + uint64_t completed; +}; + +struct MemcpyEventInfo { + // The number of bytes transferred by the memory copy. + uint64_t num_bytes; + // The kind of the memory copy. + // Each kind represents the source and destination targets of a memory copy. + // Targets are host, device, and array. Refer to CUpti_ActivityMemcpyKind + std::string copy_kind; + // The source memory kind read by the memory copy. + // Each kind represents the type of the memory accessed by a memory + // operation/copy. Refer to CUpti_ActivityMemoryKind + std::string src_kind; + // The destination memory kind read by the memory copy. + std::string dst_kind; +}; + +struct MemsetEventInfo { + // The number of bytes being set by the memory set. + uint64_t num_bytes; + // The memory kind of the memory set. Refer to CUpti_ActivityMemoryKind + std::string memory_kind; + // the value being assigned to memory by the memory set. + uint32_t value; +}; + +struct HostTraceEvent { + HostTraceEvent() = default; + HostTraceEvent(const std::string& name, TracerEventType type, + uint64_t start_ns, uint64_t end_ns, uint64_t process_id, + uint64_t thread_id) + : name(name), + type(type), + start_ns(start_ns), + end_ns(end_ns), + process_id(process_id), + thread_id(thread_id) {} + // record name + std::string name; + // record type, one of TracerEventType + TracerEventType type; + // start timestamp of the record + uint64_t start_ns; + // end timestamp of the record + uint64_t end_ns; + // process id of the record + uint64_t process_id; + // thread id of the record + uint64_t thread_id; +}; + +struct RuntimeTraceEvent { + RuntimeTraceEvent() = default; + RuntimeTraceEvent(const std::string& name, uint64_t start_ns, uint64_t end_ns, + uint64_t process_id, uint64_t thread_id, + uint32_t correlation_id, uint32_t callback_id) + : name(name), + start_ns(start_ns), + end_ns(end_ns), + process_id(process_id), + thread_id(thread_id), + correlation_id(correlation_id), + callback_id(callback_id) {} + + // record name + std::string name; + // record type, one of TracerEventType + TracerEventType type{TracerEventType::CudaRuntime}; + // start timestamp of the record + uint64_t start_ns; + // end timestamp of the record + uint64_t end_ns; + // process id of the record + uint64_t process_id; + // thread id of the record + uint64_t thread_id; + // correlation id, used for correlating async activities happened on device + uint32_t correlation_id; + // callback id, used to identify which cuda runtime api is called + uint32_t callback_id; +}; + +struct DeviceTraceEvent { + DeviceTraceEvent() = default; + DeviceTraceEvent(const std::string& name, TracerEventType type, + uint64_t start_ns, uint64_t end_ns, uint64_t device_id, + uint64_t context_id, uint64_t stream_id, + uint32_t correlation_id, const KernelEventInfo& kernel_info) + : name(name), + type(type), + start_ns(start_ns), + end_ns(end_ns), + device_id(device_id), + context_id(context_id), + stream_id(stream_id), + correlation_id(correlation_id), + kernel_info(kernel_info) {} + DeviceTraceEvent(const std::string& name, TracerEventType type, + uint64_t start_ns, uint64_t end_ns, uint64_t device_id, + uint64_t context_id, uint64_t stream_id, + uint32_t correlation_id, const MemcpyEventInfo& memcpy_info) + : name(name), + type(type), + start_ns(start_ns), + end_ns(end_ns), + device_id(device_id), + context_id(context_id), + stream_id(stream_id), + correlation_id(correlation_id), + memcpy_info(memcpy_info) {} + DeviceTraceEvent(const std::string& name, TracerEventType type, + uint64_t start_ns, uint64_t end_ns, uint64_t device_id, + uint64_t context_id, uint64_t stream_id, + uint32_t correlation_id, const MemsetEventInfo& memset_info) + : name(name), + type(type), + start_ns(start_ns), + end_ns(end_ns), + device_id(device_id), + context_id(context_id), + stream_id(stream_id), + correlation_id(correlation_id), + memset_info(memset_info) {} + // record name + std::string name; + // record type, one of TracerEventType + TracerEventType type; + // start timestamp of the record + uint64_t start_ns; + // end timestamp of the record + uint64_t end_ns; + // device id + uint64_t device_id; + // context id + uint64_t context_id; + // stream id + uint64_t stream_id; + // correlation id, used for correlating async activities happened on device + uint32_t correlation_id; + // union, specific device record type has different detail information + union { + // used for TracerEventType::Kernel + KernelEventInfo kernel_info; + // used for TracerEventType::Memcpy + MemcpyEventInfo memcpy_info; + // used for TracerEventType::Memset + MemsetEventInfo memset_info; + }; +}; + +} // namespace platform +} // namespace paddle -- GitLab