未验证 提交 1fcaab45 编写于 作者: C chenjian 提交者: GitHub

Update record interface using part3 (#39695)

* fix RecordEvent interface

* modify default level to 4

* update interface use

* add const default trace level

* update record event interface using

* update record event interface using

* update record event interface using

* update operator.cc

* update part2

* update part1

* update part3

* fix include profiler.h header in ps server

* fix include profiler.h header in ps server

* fix profiler.h header

* fix profiler.h header

* fix merge buf

* update

* fix bug

* fix bug
上级 94243789
......@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/grad_merge_all_reduce_op_handle.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
DECLARE_bool(sync_nccl_allreduce);
......@@ -47,6 +48,8 @@ GradMergeAllReduceOpHandle::GradMergeAllReduceOpHandle(
#endif
void GradMergeAllReduceOpHandle::RunImpl() {
platform::RecordEvent record_event(
Name(), platform::TracerEventType::Communication, 1);
PADDLE_ENFORCE_GT(local_scopes_.size(), 0,
platform::errors::PreconditionNotMet(
"The number of local scope should be > 0, but got %zu.",
......@@ -96,6 +99,8 @@ FusedGradMergeAllReduceOpHandle::FusedGradMergeAllReduceOpHandle(
#endif
void FusedGradMergeAllReduceOpHandle::RunImpl() {
platform::RecordEvent record_event(
Name(), platform::TracerEventType::Communication, 1);
PADDLE_ENFORCE_GT(local_scopes_.size(), 0,
platform::errors::PreconditionNotMet(
"The number of local scope should be > 0, but got %zu.",
......
......@@ -246,7 +246,8 @@ void Copy<platform::NPUPlace, platform::CPUPlace>(platform::NPUPlace dst_place,
<< dst_place << " by thream(" << stream << ")";
if (stream) {
platform::RecordEvent record_event("NpuMemcpyAsync:CPU->NPU");
platform::RecordEvent record_event(
"NpuMemcpyAsync:CPU->NPU", platform::TracerEventType::UserDefined, 1);
platform::NPUMemcpyAsync(dst, src, num, ACL_MEMCPY_HOST_TO_DEVICE,
reinterpret_cast<aclrtStream>(stream));
} else {
......@@ -256,7 +257,8 @@ void Copy<platform::NPUPlace, platform::CPUPlace>(platform::NPUPlace dst_place,
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
static_cast<platform::NPUDeviceContext*>(pool.Get(dst_place))->Wait();
platform::RecordEvent record_event("NpuMemcpySync:CPU->NPU");
platform::RecordEvent record_event(
"NpuMemcpySync:CPU->NPU", platform::TracerEventType::UserDefined, 1);
platform::NPUMemcpySync(dst, src, num, ACL_MEMCPY_HOST_TO_DEVICE);
}
}
......@@ -275,14 +277,16 @@ void Copy<platform::CPUPlace, platform::NPUPlace>(platform::CPUPlace dst_place,
<< dst_place << " by thream(" << stream << ")";
if (stream) {
platform::RecordEvent record_event("NpuMemcpyAsync:NPU->CPU");
platform::RecordEvent record_event(
"NpuMemcpyAsync:NPU->CPU", platform::TracerEventType::UserDefined, 1);
platform::NPUMemcpyAsync(dst, src, num, ACL_MEMCPY_DEVICE_TO_HOST,
reinterpret_cast<aclrtStream>(stream));
} else {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
static_cast<platform::NPUDeviceContext*>(pool.Get(src_place))->Wait();
platform::RecordEvent record_event("NpuMemcpySync:NPU->CPU");
platform::RecordEvent record_event(
"NpuMemcpySync:NPU->CPU", platform::TracerEventType::UserDefined, 1);
platform::NPUMemcpySync(dst, src, num, ACL_MEMCPY_DEVICE_TO_HOST);
}
}
......@@ -300,7 +304,9 @@ void Copy<platform::NPUPlace, platform::NPUPlace>(platform::NPUPlace dst_place,
if (dst_place == src_place) {
platform::SetNPUDeviceId(src_place.device);
if (stream) {
platform::RecordEvent record_event("NpuMemcpyAsync(same_npu):NPU->NPU");
platform::RecordEvent record_event("NpuMemcpyAsync(same_npu):NPU->NPU",
platform::TracerEventType::UserDefined,
1);
platform::NPUMemcpyAsync(dst, src, num, ACL_MEMCPY_DEVICE_TO_DEVICE,
reinterpret_cast<aclrtStream>(stream));
} else {
......@@ -308,7 +314,9 @@ void Copy<platform::NPUPlace, platform::NPUPlace>(platform::NPUPlace dst_place,
platform::DeviceContextPool::Instance();
static_cast<platform::NPUDeviceContext*>(pool.Get(dst_place))->Wait();
platform::RecordEvent record_event("NpuMemcpySync(same_npu):NPU->NPU");
platform::RecordEvent record_event("NpuMemcpySync(same_npu):NPU->NPU",
platform::TracerEventType::UserDefined,
1);
platform::NPUMemcpySync(dst, src, num, ACL_MEMCPY_DEVICE_TO_DEVICE);
}
} else {
......@@ -318,7 +326,9 @@ void Copy<platform::NPUPlace, platform::NPUPlace>(platform::NPUPlace dst_place,
}
if (stream) {
// TODO(zhiqiu): support peer access?
platform::RecordEvent record_event("NpuMemcpyPeerAsync:NPU->NPU");
platform::RecordEvent record_event("NpuMemcpyPeerAsync:NPU->NPU",
platform::TracerEventType::UserDefined,
1);
platform::NPUMemcpyAsync(dst, src, num, ACL_MEMCPY_DEVICE_TO_DEVICE,
reinterpret_cast<aclrtStream>(stream));
} else {
......@@ -326,7 +336,9 @@ void Copy<platform::NPUPlace, platform::NPUPlace>(platform::NPUPlace dst_place,
platform::DeviceContextPool::Instance();
static_cast<platform::NPUDeviceContext*>(pool.Get(dst_place))->Wait();
platform::RecordEvent record_event("NpuMemcpyPeerSync:NPU->NPU");
platform::RecordEvent record_event("NpuMemcpyPeerSync:NPU->NPU",
platform::TracerEventType::UserDefined,
1);
platform::NPUMemcpySync(dst, src, num, ACL_MEMCPY_DEVICE_TO_DEVICE);
}
}
......@@ -374,14 +386,18 @@ void Copy<platform::NPUPinnedPlace, platform::NPUPlace>(
<< dst_place << " by thream(" << stream << ")";
if (stream) {
platform::RecordEvent record_event("NpuMemcpyAsync:NPU->NPUPinned");
platform::RecordEvent record_event("NpuMemcpyAsync:NPU->NPUPinned",
platform::TracerEventType::UserDefined,
1);
platform::NPUMemcpyAsync(dst, src, num, ACL_MEMCPY_DEVICE_TO_HOST,
reinterpret_cast<aclrtStream>(stream));
} else {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
static_cast<platform::NPUDeviceContext*>(pool.Get(src_place))->Wait();
platform::RecordEvent record_event("NpuMemcpySync:NPU->NPUPinned");
platform::RecordEvent record_event("NpuMemcpySync:NPU->NPUPinned",
platform::TracerEventType::UserDefined,
1);
platform::NPUMemcpySync(dst, src, num, ACL_MEMCPY_DEVICE_TO_HOST);
}
}
......@@ -398,7 +414,9 @@ void Copy<platform::NPUPlace, platform::NPUPinnedPlace>(
<< dst_place << " by thream(" << stream << ")";
if (stream) {
platform::RecordEvent record_event("NpuMemcpyAsync:NPUPinned->NPU");
platform::RecordEvent record_event("NpuMemcpyAsync:NPUPinned->NPU",
platform::TracerEventType::UserDefined,
1);
platform::NPUMemcpyAsync(dst, src, num, ACL_MEMCPY_HOST_TO_DEVICE,
reinterpret_cast<aclrtStream>(stream));
} else {
......@@ -408,7 +426,9 @@ void Copy<platform::NPUPlace, platform::NPUPinnedPlace>(
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
static_cast<platform::NPUDeviceContext*>(pool.Get(dst_place))->Wait();
platform::RecordEvent record_event("NpuMemcpySync:NPUPinned->NPU");
platform::RecordEvent record_event("NpuMemcpySync:NPUPinned->NPU",
platform::TracerEventType::UserDefined,
1);
platform::NPUMemcpySync(dst, src, num, ACL_MEMCPY_HOST_TO_DEVICE);
}
}
......@@ -596,7 +616,8 @@ void Copy<platform::CPUPlace, platform::CUDAPlace>(
VLOG(4) << "memory::Copy " << num << " Bytes from " << src_place << " to "
<< dst_place << " by stream(" << stream << ")";
if (stream) {
platform::RecordEvent record_event("GpuMemcpyAsync:GPU->CPU");
platform::RecordEvent record_event(
"GpuMemcpyAsync:GPU->CPU", platform::TracerEventType::UserDefined, 1);
#ifdef PADDLE_WITH_HIP
platform::GpuMemcpyAsync(dst, src, num, hipMemcpyDeviceToHost,
reinterpret_cast<gpuStream_t>(stream));
......@@ -605,7 +626,8 @@ void Copy<platform::CPUPlace, platform::CUDAPlace>(
reinterpret_cast<gpuStream_t>(stream));
#endif
} else {
platform::RecordEvent record_event("GpuMemcpySync:GPU->CPU");
platform::RecordEvent record_event(
"GpuMemcpySync:GPU->CPU", platform::TracerEventType::UserDefined, 1);
#ifdef PADDLE_WITH_HIP
platform::GpuMemcpySync(dst, src, num, hipMemcpyDeviceToHost);
#else
......@@ -628,7 +650,8 @@ void Copy<platform::CUDAPlace, platform::CPUPlace>(
VLOG(4) << "memory::Copy " << num << " Bytes from " << src_place << " to "
<< dst_place << " by thream(" << stream << ")";
if (stream) {
platform::RecordEvent record_event("GpuMemcpyAsync:CPU->GPU");
platform::RecordEvent record_event(
"GpuMemcpyAsync:CPU->GPU", platform::TracerEventType::UserDefined, 1);
#ifdef PADDLE_WITH_HIP
platform::GpuMemcpyAsync(dst, src, num, hipMemcpyHostToDevice,
reinterpret_cast<gpuStream_t>(stream));
......@@ -637,7 +660,8 @@ void Copy<platform::CUDAPlace, platform::CPUPlace>(
reinterpret_cast<gpuStream_t>(stream));
#endif
} else {
platform::RecordEvent record_event("GpuMemcpySync:CPU->GPU");
platform::RecordEvent record_event(
"GpuMemcpySync:CPU->GPU", platform::TracerEventType::UserDefined, 1);
#ifdef PADDLE_WITH_HIP
platform::GpuMemcpySync(dst, src, num, hipMemcpyHostToDevice);
#else
......@@ -661,7 +685,9 @@ void Copy<platform::CUDAPlace, platform::CUDAPlace>(
if (dst_place == src_place) {
platform::SetDeviceId(src_place.device);
if (stream) {
platform::RecordEvent record_event("GpuMemcpyAsync(same_gpu):GPU->GPU");
platform::RecordEvent record_event("GpuMemcpyAsync(same_gpu):GPU->GPU",
platform::TracerEventType::UserDefined,
1);
#ifdef PADDLE_WITH_HIP
platform::GpuMemcpyAsync(dst, src, num, hipMemcpyDeviceToDevice,
reinterpret_cast<gpuStream_t>(stream));
......@@ -670,7 +696,9 @@ void Copy<platform::CUDAPlace, platform::CUDAPlace>(
reinterpret_cast<gpuStream_t>(stream));
#endif
} else {
platform::RecordEvent record_event("GpuMemcpySync(same_gpu):GPU->GPU");
platform::RecordEvent record_event("GpuMemcpySync(same_gpu):GPU->GPU",
platform::TracerEventType::UserDefined,
1);
#ifdef PADDLE_WITH_HIP
platform::GpuMemcpySync(dst, src, num, hipMemcpyDeviceToDevice);
#else
......@@ -679,11 +707,15 @@ void Copy<platform::CUDAPlace, platform::CUDAPlace>(
}
} else {
if (stream) {
platform::RecordEvent record_event("GpuMemcpyPeerAsync:GPU->GPU");
platform::RecordEvent record_event("GpuMemcpyPeerAsync:GPU->GPU",
platform::TracerEventType::UserDefined,
1);
platform::GpuMemcpyPeerAsync(dst, dst_place.device, src, src_place.device,
num, reinterpret_cast<gpuStream_t>(stream));
} else {
platform::RecordEvent record_event("GpuMemcpyPeerSync:GPU->GPU");
platform::RecordEvent record_event("GpuMemcpyPeerSync:GPU->GPU",
platform::TracerEventType::UserDefined,
1);
platform::GpuMemcpyPeerSync(dst, dst_place.device, src, src_place.device,
num);
}
......@@ -729,7 +761,9 @@ void Copy<platform::CUDAPinnedPlace, platform::CUDAPlace>(
VLOG(4) << "memory::Copy " << num << " Bytes from " << src_place << " to "
<< dst_place << " by thream(" << stream << ")";
if (stream) {
platform::RecordEvent record_event("GpuMemcpyAsync:GPU->CUDAPinned");
platform::RecordEvent record_event("GpuMemcpyAsync:GPU->CUDAPinned",
platform::TracerEventType::UserDefined,
1);
#ifdef PADDLE_WITH_HIP
platform::GpuMemcpyAsync(dst, src, num, hipMemcpyDeviceToHost,
reinterpret_cast<gpuStream_t>(stream));
......@@ -738,7 +772,9 @@ void Copy<platform::CUDAPinnedPlace, platform::CUDAPlace>(
reinterpret_cast<gpuStream_t>(stream));
#endif
} else {
platform::RecordEvent record_event("GpuMemcpySync:GPU->CUDAPinned");
platform::RecordEvent record_event("GpuMemcpySync:GPU->CUDAPinned",
platform::TracerEventType::UserDefined,
1);
#ifdef PADDLE_WITH_HIP
platform::GpuMemcpySync(dst, src, num, hipMemcpyDeviceToHost);
#else
......@@ -758,7 +794,9 @@ void Copy<platform::CUDAPlace, platform::CUDAPinnedPlace>(
VLOG(4) << "memory::Copy " << num << " Bytes from " << src_place << " to "
<< dst_place << " by thream(" << stream << ")";
if (stream) {
platform::RecordEvent record_event("GpuMemcpyAsync:CUDAPinned->GPU");
platform::RecordEvent record_event("GpuMemcpyAsync:CUDAPinned->GPU",
platform::TracerEventType::UserDefined,
1);
#ifdef PADDLE_WITH_HIP
platform::GpuMemcpyAsync(dst, src, num, hipMemcpyHostToDevice,
reinterpret_cast<gpuStream_t>(stream));
......@@ -767,7 +805,9 @@ void Copy<platform::CUDAPlace, platform::CUDAPinnedPlace>(
reinterpret_cast<gpuStream_t>(stream));
#endif
} else {
platform::RecordEvent record_event("GpuMemcpySync:CUDAPinned->GPU");
platform::RecordEvent record_event("GpuMemcpySync:CUDAPinned->GPU",
platform::TracerEventType::UserDefined,
1);
#ifdef PADDLE_WITH_HIP
platform::GpuMemcpySync(dst, src, num, hipMemcpyHostToDevice);
#else
......@@ -927,7 +967,9 @@ void Copy<platform::CPUPlace, platform::MLUPlace>(platform::CPUPlace dst_place,
if (stream) {
VLOG(4) << "Async memory::Copy " << num << " Bytes from " << src_place
<< " to " << dst_place << " by mlu stream(" << stream << ")";
platform::RecordEvent record_event("MLUMemcpyD2HAsync:MLU->CPU");
platform::RecordEvent record_event("MLUMemcpyD2HAsync:MLU->CPU",
platform::TracerEventType::UserDefined,
1);
platform::MLUMemcpyD2HAsync(dst, src, num,
reinterpret_cast<mluStream>(stream));
} else {
......@@ -936,7 +978,8 @@ void Copy<platform::CPUPlace, platform::MLUPlace>(platform::CPUPlace dst_place,
VLOG(4) << "Sync memory::Copy " << num << " Bytes from " << src_place
<< " to " << dst_place;
platform::RecordEvent record_event("MLUMemcpyD2HSync:MLU->CPU");
platform::RecordEvent record_event(
"MLUMemcpyD2HSync:MLU->CPU", platform::TracerEventType::UserDefined, 1);
platform::MLUMemcpyD2HSync(dst, src, num);
}
}
......@@ -953,7 +996,9 @@ void Copy<platform::MLUPlace, platform::CPUPlace>(platform::MLUPlace dst_place,
if (stream) {
VLOG(4) << "Async memory::Copy " << num << " Bytes from " << src_place
<< " to " << dst_place << " by mlu stream(" << stream << ")";
platform::RecordEvent record_event("MLUMemcpyH2DAsync:CPU->MLU");
platform::RecordEvent record_event("MLUMemcpyH2DAsync:CPU->MLU",
platform::TracerEventType::UserDefined,
1);
platform::MLUMemcpyH2DAsync(dst, src, num,
reinterpret_cast<mluStream>(stream));
} else {
......@@ -962,7 +1007,8 @@ void Copy<platform::MLUPlace, platform::CPUPlace>(platform::MLUPlace dst_place,
VLOG(4) << "Sync memory::Copy " << num << " Bytes from " << src_place
<< " to " << dst_place;
platform::RecordEvent record_event("MLUMemcpyH2DSync:CPU->MLU");
platform::RecordEvent record_event(
"MLUMemcpyH2DSync:CPU->MLU", platform::TracerEventType::UserDefined, 1);
platform::MLUMemcpyH2DSync(dst, src, num);
}
}
......@@ -980,8 +1026,9 @@ void Copy<platform::MLUPlace, platform::MLUPlace>(platform::MLUPlace dst_place,
if (stream) {
VLOG(4) << "Async memory::Copy " << num << " Bytes from " << src_place
<< " to " << dst_place << " by mlu stream(" << stream << ")";
platform::RecordEvent record_event(
"MLUMemcpyD2DAsync(same_mlu):MLU->MLU");
platform::RecordEvent record_event("MLUMemcpyD2DAsync(same_mlu):MLU->MLU",
platform::TracerEventType::UserDefined,
1);
platform::MLUMemcpyD2DAsync(dst, src, num,
reinterpret_cast<mluStream>(stream));
} else {
......@@ -991,20 +1038,26 @@ void Copy<platform::MLUPlace, platform::MLUPlace>(platform::MLUPlace dst_place,
VLOG(4) << "Sync memory::Copy " << num << " Bytes from " << src_place
<< " to " << dst_place;
platform::RecordEvent record_event("MLUMemcpyD2DSync(same_mlu):MLU->MLU");
platform::RecordEvent record_event("MLUMemcpyD2DSync(same_mlu):MLU->MLU",
platform::TracerEventType::UserDefined,
1);
platform::MLUMemcpyD2DSync(dst, src, num);
}
} else {
if (stream) {
VLOG(4) << "Async memory::Copy " << num << " Bytes from " << src_place
<< " to " << dst_place << " by mlu stream(" << stream << ")";
platform::RecordEvent record_event("MLUMemcpyPeerAsync:MLU->MLU");
platform::RecordEvent record_event("MLUMemcpyPeerAsync:MLU->MLU",
platform::TracerEventType::UserDefined,
1);
platform::MLUMemcpyPeerAsync(dst, dst_place.device, src, src_place.device,
num, reinterpret_cast<mluStream>(stream));
} else {
VLOG(4) << "Sync memory::Copy " << num << " Bytes from " << src_place
<< " to " << dst_place;
platform::RecordEvent record_event("MLUMemcpyPeerSync:MLU->MLU");
platform::RecordEvent record_event("MLUMemcpyPeerSync:MLU->MLU",
platform::TracerEventType::UserDefined,
1);
platform::MLUMemcpyPeerSync(dst, dst_place.device, src, src_place.device,
num);
}
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle {
namespace operators {
......
......@@ -28,7 +28,7 @@ limitations under the License. */
#include "paddle/fluid/operators/math/padding.h"
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
DECLARE_bool(cudnn_deterministic);
DECLARE_uint64(conv_workspace_size_limit);
......
......@@ -22,7 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/data_type_transform.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle {
namespace operators {
......
......@@ -21,7 +21,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle {
namespace operators {
......
......@@ -15,6 +15,7 @@
#include "paddle/fluid/operators/reader/buffered_reader.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle {
namespace operators {
......@@ -115,7 +116,9 @@ void BufferedReader::ReadAsync(size_t i) {
platform::CUDAPinnedPlace cuda_pinned_place;
std::vector<void *> cuda_pinned_ptrs;
cuda_pinned_ptrs.reserve(cpu.size());
platform::RecordEvent record_event("BufferedReader:MemoryCopy");
platform::RecordEvent record_event(
"BufferedReader:MemoryCopy", platform::TracerEventType::UserDefined,
1);
// NODE(chenweihang): When we use CUDAPinned Memory, we need call
// cudaHostAlloc, that is a CUDA API, calling CUDA API need load
// cuda lib into device, it will cost hundreds of MB of GPU memory.
......@@ -170,7 +173,9 @@ void BufferedReader::ReadAsync(size_t i) {
cudaStreamWaitEvent(stream_.get(), events_[i].get(), 0));
#endif
platform::RecordEvent record_event("BufferedReader:MemoryCopy");
platform::RecordEvent record_event(
"BufferedReader:MemoryCopy", platform::TracerEventType::UserDefined,
1);
for (size_t i = 0; i < cpu.size(); ++i) {
auto cpu_place = cpu[i].place();
auto cpu_ptr = cpu[i].data();
......@@ -229,7 +234,9 @@ void BufferedReader::ReadAsync(size_t i) {
platform::NPUEventRecord(events_[i].get(), compute_stream_);
platform::NPUStreamWaitEvent(stream_.get(), events_[i].get());
platform::RecordEvent record_event("BufferedReader:MemoryCopy");
platform::RecordEvent record_event("BufferedReader:MemoryCopy",
platform::TracerEventType::UserDefined,
1);
for (size_t i = 0; i < cpu.size(); ++i) {
auto cpu_place = cpu[i].place();
auto cpu_ptr = cpu[i].data();
......
......@@ -15,7 +15,7 @@
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle {
namespace operators {
......@@ -106,7 +106,8 @@ class ReadOp : public framework::OperatorBase {
std::vector<framework::LoDTensor> ins;
// For profiling
platform::RecordEvent record_event(Type());
platform::RecordEvent record_event(
Type().c_str(), platform::TracerEventType::UserDefined, 1);
reader->ReadNext(&ins);
if (ins.empty()) {
......
......@@ -32,6 +32,7 @@ limitations under the License. */
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle {
namespace memory {
......@@ -322,7 +323,8 @@ NPUDeviceContext::~NPUDeviceContext() {
}
void NPUDeviceContext::Wait() const {
platform::RecordEvent record_event("NPUDeviceContext/wait");
platform::RecordEvent record_event("NPUDeviceContext/wait",
platform::TracerEventType::UserDefined, 2);
VLOG(4) << "NPU context(" << this << ") Wait";
stream_->Wait();
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册