From 7ecefec303aaf125f6b83d5f35fdb421bf8147f0 Mon Sep 17 00:00:00 2001 From: liutiexing <74819124+liutiexing@users.noreply.github.com> Date: Mon, 28 Feb 2022 15:19:57 +0800 Subject: [PATCH] Profile Executor (#39641) * add align for WorkQueue * add spinlock * merge develop * merge * Add EventsWaiter * Revert "Add EventsWaiter" This reverts commit e206173aa9be7401b83a53581627bfaf557c8fb2. * add log for Executor * Profile Allocators * Profile Allocators * adjust interface * remove lock for set * fix Co-authored-by: liutiexing --- .../framework/new_executor/workqueue/thread_data_registry.h | 1 - paddle/fluid/framework/new_executor/workqueue/workqueue.cc | 5 +++++ .../memory/allocation/auto_growth_best_fit_allocator.cc | 5 +++++ paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc | 5 +++++ 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h b/paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h index a4a913cdff2..21b2927b52e 100644 --- a/paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h +++ b/paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h @@ -44,7 +44,6 @@ class ThreadDataRegistry { template ::value>> void SetCurrentThreadData(const T& val) { - std::lock_guard lock(lock_); CurrentThreadData() = val; } diff --git a/paddle/fluid/framework/new_executor/workqueue/workqueue.cc b/paddle/fluid/framework/new_executor/workqueue/workqueue.cc index 07c5298c2f2..596ffb9bfc0 100644 --- a/paddle/fluid/framework/new_executor/workqueue/workqueue.cc +++ b/paddle/fluid/framework/new_executor/workqueue/workqueue.cc @@ -8,6 +8,7 @@ #include "paddle/fluid/framework/new_executor/workqueue/nonblocking_threadpool.h" #include "paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" namespace paddle { namespace framework { @@ -61,6 +62,8 @@ class WorkQueueImpl : public WorkQueue { } void AddTask(std::function fn) override { + platform::RecordEvent("WorkQueue::AddTask", + platform::TracerEventType::UserDefined, 10 /*level*/); if (tracker_ != nullptr) { fn = [ task = std::move(fn), raii = CounterGuard(tracker_) @@ -156,6 +159,8 @@ WorkQueueGroupImpl::~WorkQueueGroupImpl() { } void WorkQueueGroupImpl::AddTask(size_t queue_idx, std::function fn) { + platform::RecordEvent("WorkQueue::AddTask", + platform::TracerEventType::UserDefined, 10 /*level*/); assert(queue_idx < queues_.size()); if (queues_options_.at(queue_idx).track_task) { fn = [ diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc index d86e5e35c08..f5e4941d787 100644 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc @@ -18,6 +18,7 @@ #include // NOLINT #include "paddle/fluid/memory/allocation/aligned_allocator.h" #include "paddle/fluid/platform/flags.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" PADDLE_DEFINE_EXPORTED_READONLY_bool( free_idle_chunk, false, @@ -47,6 +48,8 @@ AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator( phi::Allocation *AutoGrowthBestFitAllocator::AllocateImpl( size_t unaligned_size) { + platform::RecordEvent("AutoGrowthBestFitAllocator::Allocate", + platform::TracerEventType::UserDefined, 9 /*level*/); size_t size = AlignedSize(unaligned_size, alignment_); VLOG(10) << "Allocate " << unaligned_size << " bytes, aligned to " << size; @@ -108,6 +111,8 @@ phi::Allocation *AutoGrowthBestFitAllocator::AllocateImpl( } void AutoGrowthBestFitAllocator::FreeImpl(phi::Allocation *allocation) { + platform::RecordEvent("AutoGrowthBestFitAllocator::Free", + platform::TracerEventType::UserDefined, 9 /*level*/); VLOG(10) << "Free " << allocation->size() << " bytes, ptr = " << allocation->ptr(); std::lock_guard guard(spinlock_); diff --git a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc index e7b86d6ec19..8627e3e6f88 100644 --- a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" namespace paddle { namespace memory { @@ -117,6 +118,8 @@ StreamSafeCUDAAllocator::~StreamSafeCUDAAllocator() { bool StreamSafeCUDAAllocator::IsAllocThreadSafe() const { return true; } phi::Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) { + platform::RecordEvent("StreamSafeCUDAAllocator::Allocate", + platform::TracerEventType::UserDefined, 9 /*level*/); ProcessUnfreedAllocations(); VLOG(8) << "Try allocate " << size << " bytes"; AllocationPtr underlying_allocation; @@ -144,6 +147,8 @@ phi::Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) { } void StreamSafeCUDAAllocator::FreeImpl(phi::Allocation* allocation) { + platform::RecordEvent("StreamSafeCUDAAllocator::Free", + platform::TracerEventType::UserDefined, 9 /*level*/); StreamSafeCUDAAllocation* stream_safe_cuda_allocation = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL(stream_safe_cuda_allocation, -- GitLab