未验证 提交 a8afed69 编写于 作者: L liutiexing 提交者: GitHub

Profiler skeleton (#38826)

* add align for WorkQueue

* add spinlock

* merge develop

* merge

* Add EventsWaiter

* Revert "Add EventsWaiter"

This reverts commit e206173aa9be7401b83a53581627bfaf557c8fb2.

* profiler skeleton

* update

* update

* update
Co-authored-by: Nliutiexing <liutiexing@google.com>
上级 e30150dd
cc_library(workqueue SRCS workqueue.cc workqueue_utils.cc events_waiter.cc DEPS enforce glog)
cc_library(workqueue_utils SRCS workqueue_utils.cc events_waiter.cc DEPS enforce glog)
cc_library(workqueue SRCS workqueue.cc DEPS workqueue_utils enforce glog)
cc_test(workqueue_test SRCS workqueue_test.cc DEPS workqueue)
......@@ -198,7 +198,7 @@ std::unique_ptr<WorkQueue> CreateMultiThreadedWorkQueue(
"WorkQueueOptions.num_threads must be "
"greater than 1."));
std::unique_ptr<WorkQueue> ptr(new WorkQueueImpl(options));
return std::move(ptr);
return ptr;
}
std::unique_ptr<WorkQueueGroup> CreateWorkQueueGroup(
......@@ -208,7 +208,7 @@ std::unique_ptr<WorkQueueGroup> CreateWorkQueueGroup(
"For a WorkQueueGroup, the number of WorkQueueOptions "
"must be greater than 1."));
std::unique_ptr<WorkQueueGroup> ptr(new WorkQueueGroupImpl(queues_options));
return std::move(ptr);
return ptr;
}
} // namespace framework
......
......@@ -169,7 +169,8 @@ cc_test(timer_test SRCS timer_test.cc DEPS timer)
cc_library(lodtensor_printer SRCS lodtensor_printer.cc DEPS ddim place tensor scope lod_tensor variable_helper framework_proto)
cc_test(lodtensor_printer_test SRCS lodtensor_printer_test.cc DEPS lodtensor_printer)
cc_library(host_event_recorder SRCS host_event_recorder.cc DEPS os_info)
add_subdirectory(profiler)
cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS})
if(WITH_GPU)
nv_library(profiler SRCS profiler.cc profiler.cu DEPS host_event_recorder os_info device_tracer gpu_info enforce dynload_cuda)
......
......@@ -201,39 +201,5 @@ class CudaEvent {
#endif
};
struct CommonEvent {
public:
CommonEvent(const char *name, uint64_t start_ns, uint64_t end_ns,
EventRole role)
: name(name), start_ns(start_ns), end_ns(end_ns), role(role) {}
CommonEvent(std::function<void *(size_t)> &arena_allocator,
const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
EventRole role, const std::string &attr_str)
: start_ns(start_ns), end_ns(end_ns), role(role) {
auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
strncpy(buf, name_str.c_str(), name_str.length() + 1);
name = buf;
buf = static_cast<char *>(arena_allocator(attr_str.length() + 1));
strncpy(buf, attr_str.c_str(), attr_str.length() + 1);
attr = buf;
}
CommonEvent(const std::function<void *(size_t)> &arena_allocator,
const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
EventRole role)
: start_ns(start_ns), end_ns(end_ns), role(role) {
auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
strncpy(buf, name_str.c_str(), name_str.length() + 1);
name = buf;
}
const char *name = nullptr; // not owned, designed for performance
uint64_t start_ns = 0;
uint64_t end_ns = 0;
EventRole role = EventRole::kOrdinary;
const char *attr = nullptr; // not owned, designed for performance
};
} // namespace platform
} // namespace paddle
......@@ -20,8 +20,8 @@ limitations under the License. */
#include "paddle/fluid/platform/device_tracer.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/host_event_recorder.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/host_event_recorder.h"
#include "paddle/fluid/platform/profiler_helper.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/dynload/nvtx.h"
......
......@@ -27,9 +27,9 @@ limitations under the License. */
#include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/event.h"
#include "paddle/fluid/platform/event_tracing.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.pb.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#endif
......
cc_library(host_event_recorder SRCS host_event_recorder.cc DEPS os_info)
......@@ -9,7 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/host_event_recorder.h"
#include "paddle/fluid/platform/profiler/host_event_recorder.h"
#include "paddle/fluid/platform/os_info.h"
namespace paddle {
......@@ -26,7 +26,7 @@ HostEventSection HostEventRecorder::GatherEvents() {
for (auto &kv : thread_recorders_) {
host_sec.thr_sections.emplace_back(std::move(kv.second->GatherEvents()));
}
return std::move(host_sec);
return host_sec;
}
} // namespace platform
......
......@@ -25,6 +25,40 @@ limitations under the License. */
namespace paddle {
namespace platform {
struct CommonEvent {
public:
CommonEvent(const char *name, uint64_t start_ns, uint64_t end_ns,
EventRole role)
: name(name), start_ns(start_ns), end_ns(end_ns), role(role) {}
CommonEvent(std::function<void *(size_t)> &arena_allocator,
const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
EventRole role, const std::string &attr_str)
: start_ns(start_ns), end_ns(end_ns), role(role) {
auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
strncpy(buf, name_str.c_str(), name_str.length() + 1);
name = buf;
buf = static_cast<char *>(arena_allocator(attr_str.length() + 1));
strncpy(buf, attr_str.c_str(), attr_str.length() + 1);
attr = buf;
}
CommonEvent(const std::function<void *(size_t)> &arena_allocator,
const std::string &name_str, uint64_t start_ns, uint64_t end_ns,
EventRole role)
: start_ns(start_ns), end_ns(end_ns), role(role) {
auto buf = static_cast<char *>(arena_allocator(name_str.length() + 1));
strncpy(buf, name_str.c_str(), name_str.length() + 1);
name = buf;
}
const char *name = nullptr; // not owned, designed for performance
uint64_t start_ns = 0;
uint64_t end_ns = 0;
EventRole role = EventRole::kOrdinary;
const char *attr = nullptr; // not owned, designed for performance
};
template <typename HeadType, typename... RestTypes>
struct ContainsStdString
: std::conditional_t<
......@@ -154,7 +188,7 @@ std::vector<EventType> EventContainer<EventType>::Reduce() {
cur = next;
}
event_blocks_ = cur_event_block_ = new EventBlock;
return std::move(all_events);
return all_events;
}
template <typename EventType>
......@@ -204,7 +238,7 @@ class ThreadEventRecorder {
thr_sec.thread_name = thread_name_;
thr_sec.thread_id = thread_id_;
thr_sec.events = std::move(base_evt_cntr_.Reduce());
return std::move(thr_sec);
return thr_sec;
}
private:
......
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <list>
namespace paddle {
namespace platform {
struct HostRecord {
std::string name;
uint64_t start_ns;
uint64_t end_ns;
uint64_t process_id;
uint64_t thread_id;
};
struct RuntimeRecord {
std::string name;
uint64_t start_ns;
uint64_t end_ns;
uint64_t process_id;
uint64_t thread_id;
uint32_t correlation_id;
};
struct DeviceRecord {
std::string name;
uint64_t start_ns;
uint64_t end_ns;
uint32_t correlation_id;
};
class TraceEventCollector {
public:
void AddHostRecord(HostRecord&& record) { host_records_.push_back(record); }
void AddRuntimeRecord(RuntimeRecord&& record) {
runtime_records_.push_back(record);
}
void AddDeviceRecord(DeviceRecord&& record) {
device_records_.push_back(record);
}
private:
std::list<HostRecord> host_records_;
std::list<RuntimeRecord> runtime_records_;
std::list<DeviceRecord> device_records_;
};
} // namespace platform
} // namespace paddle
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/platform/profiler/trace_event_collector.h"
namespace paddle {
namespace platform {
class TracerBase {
public:
// The state machine for a Tracer.
enum class TracerState { UNINITED, READY, STARTED, STOPED };
virtual void PrepareTracing() { state_ = TracerState::READY; }
virtual void StartTracing() = 0;
virtual void StopTracing() = 0;
virtual void CollectTraceData(TraceEventCollector* collector) = 0;
virtual ~TracerBase() {}
protected:
TracerState state_ = TracerState::UNINITED;
};
} // namespace platform
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册