未验证 提交 7ecefec3 编写于 作者: L liutiexing 提交者: GitHub

Profile Executor (#39641)

* add align for WorkQueue

* add spinlock

* merge develop

* merge

* Add EventsWaiter

* Revert "Add EventsWaiter"

This reverts commit e206173aa9be7401b83a53581627bfaf557c8fb2.

* add log for Executor

* Profile Allocators

* Profile Allocators

* adjust interface

* remove lock for set

* fix
Co-authored-by: Nliutiexing <liutiexing@google.com>
上级 bc99a76c
......@@ -44,7 +44,6 @@ class ThreadDataRegistry {
template <typename Alias = T,
typename = std::enable_if_t<std::is_copy_assignable<Alias>::value>>
void SetCurrentThreadData(const T& val) {
std::lock_guard<std::mutex> lock(lock_);
CurrentThreadData() = val;
}
......
......@@ -8,6 +8,7 @@
#include "paddle/fluid/framework/new_executor/workqueue/nonblocking_threadpool.h"
#include "paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle {
namespace framework {
......@@ -61,6 +62,8 @@ class WorkQueueImpl : public WorkQueue {
}
void AddTask(std::function<void()> fn) override {
platform::RecordEvent("WorkQueue::AddTask",
platform::TracerEventType::UserDefined, 10 /*level*/);
if (tracker_ != nullptr) {
fn = [
task = std::move(fn), raii = CounterGuard<TaskTracker>(tracker_)
......@@ -156,6 +159,8 @@ WorkQueueGroupImpl::~WorkQueueGroupImpl() {
}
void WorkQueueGroupImpl::AddTask(size_t queue_idx, std::function<void()> fn) {
platform::RecordEvent("WorkQueue::AddTask",
platform::TracerEventType::UserDefined, 10 /*level*/);
assert(queue_idx < queues_.size());
if (queues_options_.at(queue_idx).track_task) {
fn = [
......
......@@ -18,6 +18,7 @@
#include <mutex> // NOLINT
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/platform/flags.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
PADDLE_DEFINE_EXPORTED_READONLY_bool(
free_idle_chunk, false,
......@@ -47,6 +48,8 @@ AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator(
phi::Allocation *AutoGrowthBestFitAllocator::AllocateImpl(
size_t unaligned_size) {
platform::RecordEvent("AutoGrowthBestFitAllocator::Allocate",
platform::TracerEventType::UserDefined, 9 /*level*/);
size_t size = AlignedSize(unaligned_size, alignment_);
VLOG(10) << "Allocate " << unaligned_size << " bytes, aligned to " << size;
......@@ -108,6 +111,8 @@ phi::Allocation *AutoGrowthBestFitAllocator::AllocateImpl(
}
void AutoGrowthBestFitAllocator::FreeImpl(phi::Allocation *allocation) {
platform::RecordEvent("AutoGrowthBestFitAllocator::Free",
platform::TracerEventType::UserDefined, 9 /*level*/);
VLOG(10) << "Free " << allocation->size()
<< " bytes, ptr = " << allocation->ptr();
std::lock_guard<SpinLock> guard(spinlock_);
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle {
namespace memory {
......@@ -117,6 +118,8 @@ StreamSafeCUDAAllocator::~StreamSafeCUDAAllocator() {
bool StreamSafeCUDAAllocator::IsAllocThreadSafe() const { return true; }
phi::Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) {
platform::RecordEvent("StreamSafeCUDAAllocator::Allocate",
platform::TracerEventType::UserDefined, 9 /*level*/);
ProcessUnfreedAllocations();
VLOG(8) << "Try allocate " << size << " bytes";
AllocationPtr underlying_allocation;
......@@ -144,6 +147,8 @@ phi::Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) {
}
void StreamSafeCUDAAllocator::FreeImpl(phi::Allocation* allocation) {
platform::RecordEvent("StreamSafeCUDAAllocator::Free",
platform::TracerEventType::UserDefined, 9 /*level*/);
StreamSafeCUDAAllocation* stream_safe_cuda_allocation =
dynamic_cast<StreamSafeCUDAAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(stream_safe_cuda_allocation,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册