diff --git a/paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h b/paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h index a4a913cdff22db18e467670be9644ed90dca542e..21b2927b52eab653e20611e135a8c0f905057fcf 100644 --- a/paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h +++ b/paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h @@ -44,7 +44,6 @@ class ThreadDataRegistry { template ::value>> void SetCurrentThreadData(const T& val) { - std::lock_guard lock(lock_); CurrentThreadData() = val; } diff --git a/paddle/fluid/framework/new_executor/workqueue/workqueue.cc b/paddle/fluid/framework/new_executor/workqueue/workqueue.cc index 07c5298c2f22377e277939e11af6fa6c142f24bc..596ffb9bfc0c4f624aeaf5874bdf18563d96d14c 100644 --- a/paddle/fluid/framework/new_executor/workqueue/workqueue.cc +++ b/paddle/fluid/framework/new_executor/workqueue/workqueue.cc @@ -8,6 +8,7 @@ #include "paddle/fluid/framework/new_executor/workqueue/nonblocking_threadpool.h" #include "paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" namespace paddle { namespace framework { @@ -61,6 +62,8 @@ class WorkQueueImpl : public WorkQueue { } void AddTask(std::function fn) override { + platform::RecordEvent("WorkQueue::AddTask", + platform::TracerEventType::UserDefined, 10 /*level*/); if (tracker_ != nullptr) { fn = [ task = std::move(fn), raii = CounterGuard(tracker_) @@ -156,6 +159,8 @@ WorkQueueGroupImpl::~WorkQueueGroupImpl() { } void WorkQueueGroupImpl::AddTask(size_t queue_idx, std::function fn) { + platform::RecordEvent("WorkQueue::AddTask", + platform::TracerEventType::UserDefined, 10 /*level*/); assert(queue_idx < queues_.size()); if (queues_options_.at(queue_idx).track_task) { fn = [ diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc index d86e5e35c08c0ef46ce86c0f372fc90f8df1811b..f5e4941d787097b5e349c0b668d6c95fad137873 100644 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc @@ -18,6 +18,7 @@ #include // NOLINT #include "paddle/fluid/memory/allocation/aligned_allocator.h" #include "paddle/fluid/platform/flags.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" PADDLE_DEFINE_EXPORTED_READONLY_bool( free_idle_chunk, false, @@ -47,6 +48,8 @@ AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator( phi::Allocation *AutoGrowthBestFitAllocator::AllocateImpl( size_t unaligned_size) { + platform::RecordEvent("AutoGrowthBestFitAllocator::Allocate", + platform::TracerEventType::UserDefined, 9 /*level*/); size_t size = AlignedSize(unaligned_size, alignment_); VLOG(10) << "Allocate " << unaligned_size << " bytes, aligned to " << size; @@ -108,6 +111,8 @@ phi::Allocation *AutoGrowthBestFitAllocator::AllocateImpl( } void AutoGrowthBestFitAllocator::FreeImpl(phi::Allocation *allocation) { + platform::RecordEvent("AutoGrowthBestFitAllocator::Free", + platform::TracerEventType::UserDefined, 9 /*level*/); VLOG(10) << "Free " << allocation->size() << " bytes, ptr = " << allocation->ptr(); std::lock_guard guard(spinlock_); diff --git a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc index e7b86d6ec19c06d4ee9086590763f1afe23f99a9..8627e3e6f8811e162ce3014c01145f331a03ee4b 100644 --- a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" namespace paddle { namespace memory { @@ -117,6 +118,8 @@ StreamSafeCUDAAllocator::~StreamSafeCUDAAllocator() { bool StreamSafeCUDAAllocator::IsAllocThreadSafe() const { return true; } phi::Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) { + platform::RecordEvent("StreamSafeCUDAAllocator::Allocate", + platform::TracerEventType::UserDefined, 9 /*level*/); ProcessUnfreedAllocations(); VLOG(8) << "Try allocate " << size << " bytes"; AllocationPtr underlying_allocation; @@ -144,6 +147,8 @@ phi::Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) { } void StreamSafeCUDAAllocator::FreeImpl(phi::Allocation* allocation) { + platform::RecordEvent("StreamSafeCUDAAllocator::Free", + platform::TracerEventType::UserDefined, 9 /*level*/); StreamSafeCUDAAllocation* stream_safe_cuda_allocation = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL(stream_safe_cuda_allocation,