未验证 提交 7ecefec3 编写于 作者: L liutiexing 提交者: GitHub

Profile Executor (#39641)

* add align for WorkQueue

* add spinlock

* merge develop

* merge

* Add EventsWaiter

* Revert "Add EventsWaiter"

This reverts commit e206173aa9be7401b83a53581627bfaf557c8fb2.

* add log for Executor

* Profile Allocators

* Profile Allocators

* adjust interface

* remove lock for set

* fix
Co-authored-by: Nliutiexing <liutiexing@google.com>
上级 bc99a76c
...@@ -44,7 +44,6 @@ class ThreadDataRegistry { ...@@ -44,7 +44,6 @@ class ThreadDataRegistry {
template <typename Alias = T, template <typename Alias = T,
typename = std::enable_if_t<std::is_copy_assignable<Alias>::value>> typename = std::enable_if_t<std::is_copy_assignable<Alias>::value>>
void SetCurrentThreadData(const T& val) { void SetCurrentThreadData(const T& val) {
std::lock_guard<std::mutex> lock(lock_);
CurrentThreadData() = val; CurrentThreadData() = val;
} }
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "paddle/fluid/framework/new_executor/workqueue/nonblocking_threadpool.h" #include "paddle/fluid/framework/new_executor/workqueue/nonblocking_threadpool.h"
#include "paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h" #include "paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -61,6 +62,8 @@ class WorkQueueImpl : public WorkQueue { ...@@ -61,6 +62,8 @@ class WorkQueueImpl : public WorkQueue {
} }
void AddTask(std::function<void()> fn) override { void AddTask(std::function<void()> fn) override {
platform::RecordEvent("WorkQueue::AddTask",
platform::TracerEventType::UserDefined, 10 /*level*/);
if (tracker_ != nullptr) { if (tracker_ != nullptr) {
fn = [ fn = [
task = std::move(fn), raii = CounterGuard<TaskTracker>(tracker_) task = std::move(fn), raii = CounterGuard<TaskTracker>(tracker_)
...@@ -156,6 +159,8 @@ WorkQueueGroupImpl::~WorkQueueGroupImpl() { ...@@ -156,6 +159,8 @@ WorkQueueGroupImpl::~WorkQueueGroupImpl() {
} }
void WorkQueueGroupImpl::AddTask(size_t queue_idx, std::function<void()> fn) { void WorkQueueGroupImpl::AddTask(size_t queue_idx, std::function<void()> fn) {
platform::RecordEvent("WorkQueue::AddTask",
platform::TracerEventType::UserDefined, 10 /*level*/);
assert(queue_idx < queues_.size()); assert(queue_idx < queues_.size());
if (queues_options_.at(queue_idx).track_task) { if (queues_options_.at(queue_idx).track_task) {
fn = [ fn = [
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include "paddle/fluid/memory/allocation/aligned_allocator.h" #include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/platform/flags.h" #include "paddle/fluid/platform/flags.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
PADDLE_DEFINE_EXPORTED_READONLY_bool( PADDLE_DEFINE_EXPORTED_READONLY_bool(
free_idle_chunk, false, free_idle_chunk, false,
...@@ -47,6 +48,8 @@ AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator( ...@@ -47,6 +48,8 @@ AutoGrowthBestFitAllocator::AutoGrowthBestFitAllocator(
phi::Allocation *AutoGrowthBestFitAllocator::AllocateImpl( phi::Allocation *AutoGrowthBestFitAllocator::AllocateImpl(
size_t unaligned_size) { size_t unaligned_size) {
platform::RecordEvent("AutoGrowthBestFitAllocator::Allocate",
platform::TracerEventType::UserDefined, 9 /*level*/);
size_t size = AlignedSize(unaligned_size, alignment_); size_t size = AlignedSize(unaligned_size, alignment_);
VLOG(10) << "Allocate " << unaligned_size << " bytes, aligned to " << size; VLOG(10) << "Allocate " << unaligned_size << " bytes, aligned to " << size;
...@@ -108,6 +111,8 @@ phi::Allocation *AutoGrowthBestFitAllocator::AllocateImpl( ...@@ -108,6 +111,8 @@ phi::Allocation *AutoGrowthBestFitAllocator::AllocateImpl(
} }
void AutoGrowthBestFitAllocator::FreeImpl(phi::Allocation *allocation) { void AutoGrowthBestFitAllocator::FreeImpl(phi::Allocation *allocation) {
platform::RecordEvent("AutoGrowthBestFitAllocator::Free",
platform::TracerEventType::UserDefined, 9 /*level*/);
VLOG(10) << "Free " << allocation->size() VLOG(10) << "Free " << allocation->size()
<< " bytes, ptr = " << allocation->ptr(); << " bytes, ptr = " << allocation->ptr();
std::lock_guard<SpinLock> guard(spinlock_); std::lock_guard<SpinLock> guard(spinlock_);
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h" #include "paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle { namespace paddle {
namespace memory { namespace memory {
...@@ -117,6 +118,8 @@ StreamSafeCUDAAllocator::~StreamSafeCUDAAllocator() { ...@@ -117,6 +118,8 @@ StreamSafeCUDAAllocator::~StreamSafeCUDAAllocator() {
bool StreamSafeCUDAAllocator::IsAllocThreadSafe() const { return true; } bool StreamSafeCUDAAllocator::IsAllocThreadSafe() const { return true; }
phi::Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) { phi::Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) {
platform::RecordEvent("StreamSafeCUDAAllocator::Allocate",
platform::TracerEventType::UserDefined, 9 /*level*/);
ProcessUnfreedAllocations(); ProcessUnfreedAllocations();
VLOG(8) << "Try allocate " << size << " bytes"; VLOG(8) << "Try allocate " << size << " bytes";
AllocationPtr underlying_allocation; AllocationPtr underlying_allocation;
...@@ -144,6 +147,8 @@ phi::Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) { ...@@ -144,6 +147,8 @@ phi::Allocation* StreamSafeCUDAAllocator::AllocateImpl(size_t size) {
} }
void StreamSafeCUDAAllocator::FreeImpl(phi::Allocation* allocation) { void StreamSafeCUDAAllocator::FreeImpl(phi::Allocation* allocation) {
platform::RecordEvent("StreamSafeCUDAAllocator::Free",
platform::TracerEventType::UserDefined, 9 /*level*/);
StreamSafeCUDAAllocation* stream_safe_cuda_allocation = StreamSafeCUDAAllocation* stream_safe_cuda_allocation =
dynamic_cast<StreamSafeCUDAAllocation*>(allocation); dynamic_cast<StreamSafeCUDAAllocation*>(allocation);
PADDLE_ENFORCE_NOT_NULL(stream_safe_cuda_allocation, PADDLE_ENFORCE_NOT_NULL(stream_safe_cuda_allocation,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册