Profiler skeleton (#38826)

* add align for WorkQueue * add spinlock * merge develop * merge * Add EventsWaiter * Revert "Add EventsWaiter" This reverts commit e206173aa9be7401b83a53581627bfaf557c8fb2. * profiler skeleton * update * update * update Co-authored-by: N liutiexing <liutiexing@google.com>

Profiler skeleton (#38826)
* add align for WorkQueue * add spinlock * merge develop * merge * Add EventsWaiter * Revert "Add EventsWaiter" This reverts commit e206173aa9be7401b83a53581627bfaf557c8fb2. * profiler skeleton * update * update * update Co-authored-by: N liutiexing <liutiexing@google.com>
a8afed69 · liutiexing · GitHub · e30150dd · a8afed69 · a8afed69
12 changed file
--- a/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt
+++ b/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt
-cc_library(workqueue SRCS workqueue.cc workqueue_utils.cc events_waiter.cc DEPS enforce glog)
+cc_library(workqueue_utils SRCS workqueue_utils.cc events_waiter.cc DEPS enforce glog)
+cc_library(workqueue SRCS workqueue.cc DEPS workqueue_utils enforce glog)
 cc_test(workqueue_test SRCS workqueue_test.cc DEPS workqueue)
--- a/paddle/fluid/framework/new_executor/workqueue/workqueue.cc
+++ b/paddle/fluid/framework/new_executor/workqueue/workqueue.cc
@@ -198,7 +198,7 @@ std::unique_ptr<WorkQueue> CreateMultiThreadedWorkQueue(
                                        "WorkQueueOptions.num_threads must be "
                                        "greater than 1."));
  std::unique_ptr<WorkQueue> ptr(new WorkQueueImpl(options));
-  return std::move(ptr);
+  return ptr;
 }
 std::unique_ptr<WorkQueueGroup> CreateWorkQueueGroup(
@@ -208,7 +208,7 @@ std::unique_ptr<WorkQueueGroup> CreateWorkQueueGroup(
                        "For a WorkQueueGroup, the number of WorkQueueOptions "
                        "must be greater than 1."));
  std::unique_ptr<WorkQueueGroup> ptr(new WorkQueueGroupImpl(queues_options));
-  return std::move(ptr);
+  return ptr;
 }
 }  // namespace framework

--- a/paddle/fluid/platform/CMakeLists.txt
+++ b/paddle/fluid/platform/CMakeLists.txt
@@ -169,7 +169,8 @@ cc_test(timer_test SRCS timer_test.cc DEPS timer)
 cc_library(lodtensor_printer SRCS lodtensor_printer.cc DEPS ddim place tensor scope lod_tensor variable_helper framework_proto)
 cc_test(lodtensor_printer_test SRCS lodtensor_printer_test.cc DEPS lodtensor_printer)
-cc_library(host_event_recorder SRCS host_event_recorder.cc DEPS os_info)
+add_subdirectory(profiler)
 cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS})
 if(WITH_GPU)
  nv_library(profiler SRCS profiler.cc profiler.cu DEPS host_event_recorder os_info device_tracer gpu_info enforce dynload_cuda)

--- a/paddle/fluid/platform/event.h
+++ b/paddle/fluid/platform/event.h
@@ -201,39 +201,5 @@ class CudaEvent {
 #endif
 };
-struct CommonEvent {
- public:
-  CommonEvent(const char *name, uint64_t start_ns, uint64_t end_ns,
-              EventRole role)
-      : name(name), start_ns(start_ns), end_ns(end_ns), role(role) {}
-  CommonEvent(std::function<void *(size_t)> &arena_allocator,
-              const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
-              EventRole role, const std::string &attr_str)
-      : start_ns(start_ns), end_ns(end_ns), role(role) {
-    auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
-    strncpy(buf, name_str.c_str(), name_str.length() + 1);
-    name = buf;
-    buf = static_cast<char *>(arena_allocator(attr_str.length() + 1));
-    strncpy(buf, attr_str.c_str(), attr_str.length() + 1);
-    attr = buf;
-  }
-  CommonEvent(const std::function<void *(size_t)> &arena_allocator,
-              const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
-              EventRole role)
-      : start_ns(start_ns), end_ns(end_ns), role(role) {
-    auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
-    strncpy(buf, name_str.c_str(), name_str.length() + 1);
-    name = buf;
-  }
-  const char *name = nullptr;  // not owned, designed for performance
-  uint64_t start_ns = 0;
-  uint64_t end_ns = 0;
-  EventRole role = EventRole::kOrdinary;
-  const char *attr = nullptr;  // not owned, designed for performance
-};
 }  // namespace platform
 }  // namespace paddle
--- a/paddle/fluid/platform/profiler.cc
+++ b/paddle/fluid/platform/profiler.cc
@@ -20,8 +20,8 @@ limitations under the License. */
 #include "paddle/fluid/platform/device_tracer.h"
 #include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/host_event_recorder.h"
 #include "paddle/fluid/platform/profiler.h"
+#include "paddle/fluid/platform/profiler/host_event_recorder.h"
 #include "paddle/fluid/platform/profiler_helper.h"
 #ifdef PADDLE_WITH_CUDA
 #include "paddle/fluid/platform/dynload/nvtx.h"

--- a/paddle/fluid/platform/profiler.h
+++ b/paddle/fluid/platform/profiler.h
@@ -27,9 +27,9 @@ limitations under the License. */
 #include "paddle/fluid/framework/type_defs.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/event.h"
-#include "paddle/fluid/platform/event_tracing.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/platform/profiler.pb.h"
+#include "paddle/fluid/platform/profiler/event_tracing.h"
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 #include "paddle/fluid/platform/device/gpu/gpu_info.h"
 #endif

--- a/paddle/fluid/platform/profiler/CMakeLists.txt
+++ b/paddle/fluid/platform/profiler/CMakeLists.txt
+cc_library(host_event_recorder SRCS host_event_recorder.cc DEPS os_info)
--- a/paddle/fluid/platform/event_tracing.h
+++ b/paddle/fluid/platform/event_tracing.h
--- a/paddle/fluid/platform/host_event_recorder.cc
+++ b/paddle/fluid/platform/host_event_recorder.cc
@@ -9,7 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/fluid/platform/host_event_recorder.h"
+#include "paddle/fluid/platform/profiler/host_event_recorder.h"
 #include "paddle/fluid/platform/os_info.h"
 namespace paddle {
@@ -26,7 +26,7 @@ HostEventSection HostEventRecorder::GatherEvents() {
  for (auto &kv : thread_recorders_) {
    host_sec.thr_sections.emplace_back(std::move(kv.second->GatherEvents()));
  }
-  return std::move(host_sec);
+  return host_sec;
 }
 }  // namespace platform

--- a/paddle/fluid/platform/host_event_recorder.h
+++ b/paddle/fluid/platform/host_event_recorder.h
@@ -25,6 +25,40 @@ limitations under the License. */
 namespace paddle {
 namespace platform {
+struct CommonEvent {
+ public:
+  CommonEvent(const char *name, uint64_t start_ns, uint64_t end_ns,
+              EventRole role)
+      : name(name), start_ns(start_ns), end_ns(end_ns), role(role) {}
+  CommonEvent(std::function<void *(size_t)> &arena_allocator,
+              const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
+              EventRole role, const std::string &attr_str)
+      : start_ns(start_ns), end_ns(end_ns), role(role) {
+    auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
+    strncpy(buf, name_str.c_str(), name_str.length() + 1);
+    name = buf;
+    buf = static_cast<char *>(arena_allocator(attr_str.length() + 1));
+    strncpy(buf, attr_str.c_str(), attr_str.length() + 1);
+    attr = buf;
+  }
+  CommonEvent(const std::function<void *(size_t)> &arena_allocator,
+              const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
+              EventRole role)
+      : start_ns(start_ns), end_ns(end_ns), role(role) {
+    auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
+    strncpy(buf, name_str.c_str(), name_str.length() + 1);
+    name = buf;
+  }
+  const char *name = nullptr;  // not owned, designed for performance
+  uint64_t start_ns = 0;
+  uint64_t end_ns = 0;
+  EventRole role = EventRole::kOrdinary;
+  const char *attr = nullptr;  // not owned, designed for performance
+};
 template <typename HeadType, typename... RestTypes>
 struct ContainsStdString
    : std::conditional_t<
@@ -154,7 +188,7 @@ std::vector<EventType> EventContainer<EventType>::Reduce() {
    cur = next;
  }
  event_blocks_ = cur_event_block_ = new EventBlock;
-  return std::move(all_events);
+  return all_events;
 }
 template <typename EventType>
@@ -204,7 +238,7 @@ class ThreadEventRecorder {
    thr_sec.thread_name = thread_name_;
    thr_sec.thread_id = thread_id_;
    thr_sec.events = std::move(base_evt_cntr_.Reduce());
-    return std::move(thr_sec);
+    return thr_sec;
  }
 private:

--- a/paddle/fluid/platform/profiler/trace_event_collector.h
+++ b/paddle/fluid/platform/profiler/trace_event_collector.h
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include <list>
+namespace paddle {
+namespace platform {
+struct HostRecord {
+  std::string name;
+  uint64_t start_ns;
+  uint64_t end_ns;
+  uint64_t process_id;
+  uint64_t thread_id;
+};
+struct RuntimeRecord {
+  std::string name;
+  uint64_t start_ns;
+  uint64_t end_ns;
+  uint64_t process_id;
+  uint64_t thread_id;
+  uint32_t correlation_id;
+};
+struct DeviceRecord {
+  std::string name;
+  uint64_t start_ns;
+  uint64_t end_ns;
+  uint32_t correlation_id;
+};
+class TraceEventCollector {
+ public:
+  void AddHostRecord(HostRecord&& record) { host_records_.push_back(record); }
+  void AddRuntimeRecord(RuntimeRecord&& record) {
+    runtime_records_.push_back(record);
+  }
+  void AddDeviceRecord(DeviceRecord&& record) {
+    device_records_.push_back(record);
+  }
+ private:
+  std::list<HostRecord> host_records_;
+  std::list<RuntimeRecord> runtime_records_;
+  std::list<DeviceRecord> device_records_;
+};
+}  // namespace platform
+}  // namespace paddle
--- a/paddle/fluid/platform/profiler/tracer_base.h
+++ b/paddle/fluid/platform/profiler/tracer_base.h
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include "paddle/fluid/platform/profiler/trace_event_collector.h"
+namespace paddle {
+namespace platform {
+class TracerBase {
+ public:
+  // The state machine for a Tracer.
+  enum class TracerState { UNINITED, READY, STARTED, STOPED };
+  virtual void PrepareTracing() { state_ = TracerState::READY; }
+  virtual void StartTracing() = 0;
+  virtual void StopTracing() = 0;
+  virtual void CollectTraceData(TraceEventCollector* collector) = 0;
+  virtual ~TracerBase() {}
+ protected:
+  TracerState state_ = TracerState::UNINITED;
+};
+}  // namespace platform
+}  // namespace paddle