From 1fcaab456976ee5e72d056f72ed65884ce77d784 Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 23 Feb 2022 10:01:14 +0800 Subject: [PATCH] Update record interface using part3 (#39695) * fix RecordEvent interface * modify default level to 4 * update interface use * add const default trace level * update record event interface using * update record event interface using * update record event interface using * update operator.cc * update part2 * update part1 * update part3 * fix include profiler.h header in ps server * fix include profiler.h header in ps server * fix profiler.h header * fix profiler.h header * fix merge buf * update * fix bug * fix bug --- .../grad_merge_all_reduce_op_handle.cc | 5 + paddle/fluid/memory/memcpy.cc | 119 +++++++++++++----- .../fluid/operators/controlflow/fetch_op.cc | 2 +- paddle/fluid/operators/conv_cudnn_op.cu | 2 +- paddle/fluid/operators/load_op.h | 2 +- .../operators/pscore/send_and_recv_op.cc | 2 +- .../fluid/operators/reader/buffered_reader.cc | 13 +- paddle/fluid/operators/reader/read_op.cc | 5 +- paddle/fluid/platform/device_context.cc | 4 +- 9 files changed, 111 insertions(+), 43 deletions(-) diff --git a/paddle/fluid/framework/details/grad_merge_all_reduce_op_handle.cc b/paddle/fluid/framework/details/grad_merge_all_reduce_op_handle.cc index a623266719..44b9ca90fc 100644 --- a/paddle/fluid/framework/details/grad_merge_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/grad_merge_all_reduce_op_handle.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/details/grad_merge_all_reduce_op_handle.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) DECLARE_bool(sync_nccl_allreduce); @@ -47,6 +48,8 @@ GradMergeAllReduceOpHandle::GradMergeAllReduceOpHandle( #endif void GradMergeAllReduceOpHandle::RunImpl() { + platform::RecordEvent record_event( + Name(), platform::TracerEventType::Communication, 1); PADDLE_ENFORCE_GT(local_scopes_.size(), 0, platform::errors::PreconditionNotMet( "The number of local scope should be > 0, but got %zu.", @@ -96,6 +99,8 @@ FusedGradMergeAllReduceOpHandle::FusedGradMergeAllReduceOpHandle( #endif void FusedGradMergeAllReduceOpHandle::RunImpl() { + platform::RecordEvent record_event( + Name(), platform::TracerEventType::Communication, 1); PADDLE_ENFORCE_GT(local_scopes_.size(), 0, platform::errors::PreconditionNotMet( "The number of local scope should be > 0, but got %zu.", diff --git a/paddle/fluid/memory/memcpy.cc b/paddle/fluid/memory/memcpy.cc index a71e5fe987..166cdd0b5d 100644 --- a/paddle/fluid/memory/memcpy.cc +++ b/paddle/fluid/memory/memcpy.cc @@ -246,7 +246,8 @@ void Copy(platform::NPUPlace dst_place, << dst_place << " by thream(" << stream << ")"; if (stream) { - platform::RecordEvent record_event("NpuMemcpyAsync:CPU->NPU"); + platform::RecordEvent record_event( + "NpuMemcpyAsync:CPU->NPU", platform::TracerEventType::UserDefined, 1); platform::NPUMemcpyAsync(dst, src, num, ACL_MEMCPY_HOST_TO_DEVICE, reinterpret_cast(stream)); } else { @@ -256,7 +257,8 @@ void Copy(platform::NPUPlace dst_place, platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); static_cast(pool.Get(dst_place))->Wait(); - platform::RecordEvent record_event("NpuMemcpySync:CPU->NPU"); + platform::RecordEvent record_event( + "NpuMemcpySync:CPU->NPU", platform::TracerEventType::UserDefined, 1); platform::NPUMemcpySync(dst, src, num, ACL_MEMCPY_HOST_TO_DEVICE); } } @@ -275,14 +277,16 @@ void Copy(platform::CPUPlace dst_place, << dst_place << " by thream(" << stream << ")"; if (stream) { - platform::RecordEvent record_event("NpuMemcpyAsync:NPU->CPU"); + platform::RecordEvent record_event( + "NpuMemcpyAsync:NPU->CPU", platform::TracerEventType::UserDefined, 1); platform::NPUMemcpyAsync(dst, src, num, ACL_MEMCPY_DEVICE_TO_HOST, reinterpret_cast(stream)); } else { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); static_cast(pool.Get(src_place))->Wait(); - platform::RecordEvent record_event("NpuMemcpySync:NPU->CPU"); + platform::RecordEvent record_event( + "NpuMemcpySync:NPU->CPU", platform::TracerEventType::UserDefined, 1); platform::NPUMemcpySync(dst, src, num, ACL_MEMCPY_DEVICE_TO_HOST); } } @@ -300,7 +304,9 @@ void Copy(platform::NPUPlace dst_place, if (dst_place == src_place) { platform::SetNPUDeviceId(src_place.device); if (stream) { - platform::RecordEvent record_event("NpuMemcpyAsync(same_npu):NPU->NPU"); + platform::RecordEvent record_event("NpuMemcpyAsync(same_npu):NPU->NPU", + platform::TracerEventType::UserDefined, + 1); platform::NPUMemcpyAsync(dst, src, num, ACL_MEMCPY_DEVICE_TO_DEVICE, reinterpret_cast(stream)); } else { @@ -308,7 +314,9 @@ void Copy(platform::NPUPlace dst_place, platform::DeviceContextPool::Instance(); static_cast(pool.Get(dst_place))->Wait(); - platform::RecordEvent record_event("NpuMemcpySync(same_npu):NPU->NPU"); + platform::RecordEvent record_event("NpuMemcpySync(same_npu):NPU->NPU", + platform::TracerEventType::UserDefined, + 1); platform::NPUMemcpySync(dst, src, num, ACL_MEMCPY_DEVICE_TO_DEVICE); } } else { @@ -318,7 +326,9 @@ void Copy(platform::NPUPlace dst_place, } if (stream) { // TODO(zhiqiu): support peer access? - platform::RecordEvent record_event("NpuMemcpyPeerAsync:NPU->NPU"); + platform::RecordEvent record_event("NpuMemcpyPeerAsync:NPU->NPU", + platform::TracerEventType::UserDefined, + 1); platform::NPUMemcpyAsync(dst, src, num, ACL_MEMCPY_DEVICE_TO_DEVICE, reinterpret_cast(stream)); } else { @@ -326,7 +336,9 @@ void Copy(platform::NPUPlace dst_place, platform::DeviceContextPool::Instance(); static_cast(pool.Get(dst_place))->Wait(); - platform::RecordEvent record_event("NpuMemcpyPeerSync:NPU->NPU"); + platform::RecordEvent record_event("NpuMemcpyPeerSync:NPU->NPU", + platform::TracerEventType::UserDefined, + 1); platform::NPUMemcpySync(dst, src, num, ACL_MEMCPY_DEVICE_TO_DEVICE); } } @@ -374,14 +386,18 @@ void Copy( << dst_place << " by thream(" << stream << ")"; if (stream) { - platform::RecordEvent record_event("NpuMemcpyAsync:NPU->NPUPinned"); + platform::RecordEvent record_event("NpuMemcpyAsync:NPU->NPUPinned", + platform::TracerEventType::UserDefined, + 1); platform::NPUMemcpyAsync(dst, src, num, ACL_MEMCPY_DEVICE_TO_HOST, reinterpret_cast(stream)); } else { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); static_cast(pool.Get(src_place))->Wait(); - platform::RecordEvent record_event("NpuMemcpySync:NPU->NPUPinned"); + platform::RecordEvent record_event("NpuMemcpySync:NPU->NPUPinned", + platform::TracerEventType::UserDefined, + 1); platform::NPUMemcpySync(dst, src, num, ACL_MEMCPY_DEVICE_TO_HOST); } } @@ -398,7 +414,9 @@ void Copy( << dst_place << " by thream(" << stream << ")"; if (stream) { - platform::RecordEvent record_event("NpuMemcpyAsync:NPUPinned->NPU"); + platform::RecordEvent record_event("NpuMemcpyAsync:NPUPinned->NPU", + platform::TracerEventType::UserDefined, + 1); platform::NPUMemcpyAsync(dst, src, num, ACL_MEMCPY_HOST_TO_DEVICE, reinterpret_cast(stream)); } else { @@ -408,7 +426,9 @@ void Copy( platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); static_cast(pool.Get(dst_place))->Wait(); - platform::RecordEvent record_event("NpuMemcpySync:NPUPinned->NPU"); + platform::RecordEvent record_event("NpuMemcpySync:NPUPinned->NPU", + platform::TracerEventType::UserDefined, + 1); platform::NPUMemcpySync(dst, src, num, ACL_MEMCPY_HOST_TO_DEVICE); } } @@ -596,7 +616,8 @@ void Copy( VLOG(4) << "memory::Copy " << num << " Bytes from " << src_place << " to " << dst_place << " by stream(" << stream << ")"; if (stream) { - platform::RecordEvent record_event("GpuMemcpyAsync:GPU->CPU"); + platform::RecordEvent record_event( + "GpuMemcpyAsync:GPU->CPU", platform::TracerEventType::UserDefined, 1); #ifdef PADDLE_WITH_HIP platform::GpuMemcpyAsync(dst, src, num, hipMemcpyDeviceToHost, reinterpret_cast(stream)); @@ -605,7 +626,8 @@ void Copy( reinterpret_cast(stream)); #endif } else { - platform::RecordEvent record_event("GpuMemcpySync:GPU->CPU"); + platform::RecordEvent record_event( + "GpuMemcpySync:GPU->CPU", platform::TracerEventType::UserDefined, 1); #ifdef PADDLE_WITH_HIP platform::GpuMemcpySync(dst, src, num, hipMemcpyDeviceToHost); #else @@ -628,7 +650,8 @@ void Copy( VLOG(4) << "memory::Copy " << num << " Bytes from " << src_place << " to " << dst_place << " by thream(" << stream << ")"; if (stream) { - platform::RecordEvent record_event("GpuMemcpyAsync:CPU->GPU"); + platform::RecordEvent record_event( + "GpuMemcpyAsync:CPU->GPU", platform::TracerEventType::UserDefined, 1); #ifdef PADDLE_WITH_HIP platform::GpuMemcpyAsync(dst, src, num, hipMemcpyHostToDevice, reinterpret_cast(stream)); @@ -637,7 +660,8 @@ void Copy( reinterpret_cast(stream)); #endif } else { - platform::RecordEvent record_event("GpuMemcpySync:CPU->GPU"); + platform::RecordEvent record_event( + "GpuMemcpySync:CPU->GPU", platform::TracerEventType::UserDefined, 1); #ifdef PADDLE_WITH_HIP platform::GpuMemcpySync(dst, src, num, hipMemcpyHostToDevice); #else @@ -661,7 +685,9 @@ void Copy( if (dst_place == src_place) { platform::SetDeviceId(src_place.device); if (stream) { - platform::RecordEvent record_event("GpuMemcpyAsync(same_gpu):GPU->GPU"); + platform::RecordEvent record_event("GpuMemcpyAsync(same_gpu):GPU->GPU", + platform::TracerEventType::UserDefined, + 1); #ifdef PADDLE_WITH_HIP platform::GpuMemcpyAsync(dst, src, num, hipMemcpyDeviceToDevice, reinterpret_cast(stream)); @@ -670,7 +696,9 @@ void Copy( reinterpret_cast(stream)); #endif } else { - platform::RecordEvent record_event("GpuMemcpySync(same_gpu):GPU->GPU"); + platform::RecordEvent record_event("GpuMemcpySync(same_gpu):GPU->GPU", + platform::TracerEventType::UserDefined, + 1); #ifdef PADDLE_WITH_HIP platform::GpuMemcpySync(dst, src, num, hipMemcpyDeviceToDevice); #else @@ -679,11 +707,15 @@ void Copy( } } else { if (stream) { - platform::RecordEvent record_event("GpuMemcpyPeerAsync:GPU->GPU"); + platform::RecordEvent record_event("GpuMemcpyPeerAsync:GPU->GPU", + platform::TracerEventType::UserDefined, + 1); platform::GpuMemcpyPeerAsync(dst, dst_place.device, src, src_place.device, num, reinterpret_cast(stream)); } else { - platform::RecordEvent record_event("GpuMemcpyPeerSync:GPU->GPU"); + platform::RecordEvent record_event("GpuMemcpyPeerSync:GPU->GPU", + platform::TracerEventType::UserDefined, + 1); platform::GpuMemcpyPeerSync(dst, dst_place.device, src, src_place.device, num); } @@ -729,7 +761,9 @@ void Copy( VLOG(4) << "memory::Copy " << num << " Bytes from " << src_place << " to " << dst_place << " by thream(" << stream << ")"; if (stream) { - platform::RecordEvent record_event("GpuMemcpyAsync:GPU->CUDAPinned"); + platform::RecordEvent record_event("GpuMemcpyAsync:GPU->CUDAPinned", + platform::TracerEventType::UserDefined, + 1); #ifdef PADDLE_WITH_HIP platform::GpuMemcpyAsync(dst, src, num, hipMemcpyDeviceToHost, reinterpret_cast(stream)); @@ -738,7 +772,9 @@ void Copy( reinterpret_cast(stream)); #endif } else { - platform::RecordEvent record_event("GpuMemcpySync:GPU->CUDAPinned"); + platform::RecordEvent record_event("GpuMemcpySync:GPU->CUDAPinned", + platform::TracerEventType::UserDefined, + 1); #ifdef PADDLE_WITH_HIP platform::GpuMemcpySync(dst, src, num, hipMemcpyDeviceToHost); #else @@ -758,7 +794,9 @@ void Copy( VLOG(4) << "memory::Copy " << num << " Bytes from " << src_place << " to " << dst_place << " by thream(" << stream << ")"; if (stream) { - platform::RecordEvent record_event("GpuMemcpyAsync:CUDAPinned->GPU"); + platform::RecordEvent record_event("GpuMemcpyAsync:CUDAPinned->GPU", + platform::TracerEventType::UserDefined, + 1); #ifdef PADDLE_WITH_HIP platform::GpuMemcpyAsync(dst, src, num, hipMemcpyHostToDevice, reinterpret_cast(stream)); @@ -767,7 +805,9 @@ void Copy( reinterpret_cast(stream)); #endif } else { - platform::RecordEvent record_event("GpuMemcpySync:CUDAPinned->GPU"); + platform::RecordEvent record_event("GpuMemcpySync:CUDAPinned->GPU", + platform::TracerEventType::UserDefined, + 1); #ifdef PADDLE_WITH_HIP platform::GpuMemcpySync(dst, src, num, hipMemcpyHostToDevice); #else @@ -927,7 +967,9 @@ void Copy(platform::CPUPlace dst_place, if (stream) { VLOG(4) << "Async memory::Copy " << num << " Bytes from " << src_place << " to " << dst_place << " by mlu stream(" << stream << ")"; - platform::RecordEvent record_event("MLUMemcpyD2HAsync:MLU->CPU"); + platform::RecordEvent record_event("MLUMemcpyD2HAsync:MLU->CPU", + platform::TracerEventType::UserDefined, + 1); platform::MLUMemcpyD2HAsync(dst, src, num, reinterpret_cast(stream)); } else { @@ -936,7 +978,8 @@ void Copy(platform::CPUPlace dst_place, VLOG(4) << "Sync memory::Copy " << num << " Bytes from " << src_place << " to " << dst_place; - platform::RecordEvent record_event("MLUMemcpyD2HSync:MLU->CPU"); + platform::RecordEvent record_event( + "MLUMemcpyD2HSync:MLU->CPU", platform::TracerEventType::UserDefined, 1); platform::MLUMemcpyD2HSync(dst, src, num); } } @@ -953,7 +996,9 @@ void Copy(platform::MLUPlace dst_place, if (stream) { VLOG(4) << "Async memory::Copy " << num << " Bytes from " << src_place << " to " << dst_place << " by mlu stream(" << stream << ")"; - platform::RecordEvent record_event("MLUMemcpyH2DAsync:CPU->MLU"); + platform::RecordEvent record_event("MLUMemcpyH2DAsync:CPU->MLU", + platform::TracerEventType::UserDefined, + 1); platform::MLUMemcpyH2DAsync(dst, src, num, reinterpret_cast(stream)); } else { @@ -962,7 +1007,8 @@ void Copy(platform::MLUPlace dst_place, VLOG(4) << "Sync memory::Copy " << num << " Bytes from " << src_place << " to " << dst_place; - platform::RecordEvent record_event("MLUMemcpyH2DSync:CPU->MLU"); + platform::RecordEvent record_event( + "MLUMemcpyH2DSync:CPU->MLU", platform::TracerEventType::UserDefined, 1); platform::MLUMemcpyH2DSync(dst, src, num); } } @@ -980,8 +1026,9 @@ void Copy(platform::MLUPlace dst_place, if (stream) { VLOG(4) << "Async memory::Copy " << num << " Bytes from " << src_place << " to " << dst_place << " by mlu stream(" << stream << ")"; - platform::RecordEvent record_event( - "MLUMemcpyD2DAsync(same_mlu):MLU->MLU"); + platform::RecordEvent record_event("MLUMemcpyD2DAsync(same_mlu):MLU->MLU", + platform::TracerEventType::UserDefined, + 1); platform::MLUMemcpyD2DAsync(dst, src, num, reinterpret_cast(stream)); } else { @@ -991,20 +1038,26 @@ void Copy(platform::MLUPlace dst_place, VLOG(4) << "Sync memory::Copy " << num << " Bytes from " << src_place << " to " << dst_place; - platform::RecordEvent record_event("MLUMemcpyD2DSync(same_mlu):MLU->MLU"); + platform::RecordEvent record_event("MLUMemcpyD2DSync(same_mlu):MLU->MLU", + platform::TracerEventType::UserDefined, + 1); platform::MLUMemcpyD2DSync(dst, src, num); } } else { if (stream) { VLOG(4) << "Async memory::Copy " << num << " Bytes from " << src_place << " to " << dst_place << " by mlu stream(" << stream << ")"; - platform::RecordEvent record_event("MLUMemcpyPeerAsync:MLU->MLU"); + platform::RecordEvent record_event("MLUMemcpyPeerAsync:MLU->MLU", + platform::TracerEventType::UserDefined, + 1); platform::MLUMemcpyPeerAsync(dst, dst_place.device, src, src_place.device, num, reinterpret_cast(stream)); } else { VLOG(4) << "Sync memory::Copy " << num << " Bytes from " << src_place << " to " << dst_place; - platform::RecordEvent record_event("MLUMemcpyPeerSync:MLU->MLU"); + platform::RecordEvent record_event("MLUMemcpyPeerSync:MLU->MLU", + platform::TracerEventType::UserDefined, + 1); platform::MLUMemcpyPeerSync(dst, dst_place.device, src, src_place.device, num); } diff --git a/paddle/fluid/operators/controlflow/fetch_op.cc b/paddle/fluid/operators/controlflow/fetch_op.cc index ed4995d4fb..de3d8bd996 100644 --- a/paddle/fluid/operators/controlflow/fetch_op.cc +++ b/paddle/fluid/operators/controlflow/fetch_op.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/conv_cudnn_op.cu b/paddle/fluid/operators/conv_cudnn_op.cu index bda5ac42da..dff60afd74 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu +++ b/paddle/fluid/operators/conv_cudnn_op.cu @@ -28,7 +28,7 @@ limitations under the License. */ #include "paddle/fluid/operators/math/padding.h" #include "paddle/fluid/platform/cudnn_workspace_helper.h" #include "paddle/fluid/platform/float16.h" -#include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" DECLARE_bool(cudnn_deterministic); DECLARE_uint64(conv_workspace_size_limit); diff --git a/paddle/fluid/operators/load_op.h b/paddle/fluid/operators/load_op.h index 521a35646c..7a161fb9dd 100644 --- a/paddle/fluid/operators/load_op.h +++ b/paddle/fluid/operators/load_op.h @@ -22,7 +22,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/pscore/send_and_recv_op.cc b/paddle/fluid/operators/pscore/send_and_recv_op.cc index 980351e12a..c5971632b0 100644 --- a/paddle/fluid/operators/pscore/send_and_recv_op.cc +++ b/paddle/fluid/operators/pscore/send_and_recv_op.cc @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reader/buffered_reader.cc b/paddle/fluid/operators/reader/buffered_reader.cc index 6393ff2135..21c23a7f60 100644 --- a/paddle/fluid/operators/reader/buffered_reader.cc +++ b/paddle/fluid/operators/reader/buffered_reader.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/operators/reader/buffered_reader.h" #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" namespace paddle { namespace operators { @@ -115,7 +116,9 @@ void BufferedReader::ReadAsync(size_t i) { platform::CUDAPinnedPlace cuda_pinned_place; std::vector cuda_pinned_ptrs; cuda_pinned_ptrs.reserve(cpu.size()); - platform::RecordEvent record_event("BufferedReader:MemoryCopy"); + platform::RecordEvent record_event( + "BufferedReader:MemoryCopy", platform::TracerEventType::UserDefined, + 1); // NODE(chenweihang): When we use CUDAPinned Memory, we need call // cudaHostAlloc, that is a CUDA API, calling CUDA API need load // cuda lib into device, it will cost hundreds of MB of GPU memory. @@ -170,7 +173,9 @@ void BufferedReader::ReadAsync(size_t i) { cudaStreamWaitEvent(stream_.get(), events_[i].get(), 0)); #endif - platform::RecordEvent record_event("BufferedReader:MemoryCopy"); + platform::RecordEvent record_event( + "BufferedReader:MemoryCopy", platform::TracerEventType::UserDefined, + 1); for (size_t i = 0; i < cpu.size(); ++i) { auto cpu_place = cpu[i].place(); auto cpu_ptr = cpu[i].data(); @@ -229,7 +234,9 @@ void BufferedReader::ReadAsync(size_t i) { platform::NPUEventRecord(events_[i].get(), compute_stream_); platform::NPUStreamWaitEvent(stream_.get(), events_[i].get()); - platform::RecordEvent record_event("BufferedReader:MemoryCopy"); + platform::RecordEvent record_event("BufferedReader:MemoryCopy", + platform::TracerEventType::UserDefined, + 1); for (size_t i = 0; i < cpu.size(); ++i) { auto cpu_place = cpu[i].place(); auto cpu_ptr = cpu[i].data(); diff --git a/paddle/fluid/operators/reader/read_op.cc b/paddle/fluid/operators/reader/read_op.cc index 73bc67287c..d406640bff 100644 --- a/paddle/fluid/operators/reader/read_op.cc +++ b/paddle/fluid/operators/reader/read_op.cc @@ -15,7 +15,7 @@ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/reader.h" -#include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" namespace paddle { namespace operators { @@ -106,7 +106,8 @@ class ReadOp : public framework::OperatorBase { std::vector ins; // For profiling - platform::RecordEvent record_event(Type()); + platform::RecordEvent record_event( + Type().c_str(), platform::TracerEventType::UserDefined, 1); reader->ReadNext(&ins); if (ins.empty()) { diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index e5e369efd6..4282ec2062 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -32,6 +32,7 @@ limitations under the License. */ #include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" namespace paddle { namespace memory { @@ -322,7 +323,8 @@ NPUDeviceContext::~NPUDeviceContext() { } void NPUDeviceContext::Wait() const { - platform::RecordEvent record_event("NPUDeviceContext/wait"); + platform::RecordEvent record_event("NPUDeviceContext/wait", + platform::TracerEventType::UserDefined, 2); VLOG(4) << "NPU context(" << this << ") Wait"; stream_->Wait(); } -- GitLab