diff --git a/paddle/fluid/platform/device_tracer.cc b/paddle/fluid/platform/device_tracer.cc index ea4564058d602a9abe43bd063f1ed73f88a2de08..86f18b7a800c76e71c2b5bef262640f0a80ee224 100644 --- a/paddle/fluid/platform/device_tracer.cc +++ b/paddle/fluid/platform/device_tracer.cc @@ -143,7 +143,7 @@ void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, case CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL: { auto *kernel = reinterpret_cast(record); - tracer->AddKernelRecords(kernel->start, kernel->end, + tracer->AddKernelRecords(kernel->name, kernel->start, kernel->end, kernel->deviceId, kernel->streamId, kernel->correlationId); break; @@ -224,8 +224,9 @@ class DeviceTracerImpl : public DeviceTracer { stream_id, correlation_id, bytes}); } - void AddKernelRecords(uint64_t start, uint64_t end, int64_t device_id, - int64_t stream_id, uint32_t correlation_id) { + void AddKernelRecords(std::string name, uint64_t start, uint64_t end, + int64_t device_id, int64_t stream_id, + uint32_t correlation_id) { // 0 means timestamp information could not be collected for the kernel. if (start == 0 || end == 0) { VLOG(30) << correlation_id << " cannot be traced"; @@ -233,7 +234,7 @@ class DeviceTracerImpl : public DeviceTracer { } std::lock_guard l(trace_mu_); kernel_records_.push_back( - KernelRecord{start, end, device_id, stream_id, correlation_id}); + KernelRecord{name, start, end, device_id, stream_id, correlation_id}); } bool IsEnabled() { @@ -276,13 +277,13 @@ class DeviceTracerImpl : public DeviceTracer { profile_pb.set_start_ns(start_ns_); profile_pb.set_end_ns(end_ns_); for (const KernelRecord &r : kernel_records_) { - if (correlations_.find(r.correlation_id) == correlations_.end()) { - fprintf(stderr, "cannot relate a kernel activity\n"); - continue; - } auto *event = profile_pb.add_events(); event->set_type(proto::Event::GPUKernel); - event->set_name(correlations_.at(r.correlation_id)); + if (correlations_.find(r.correlation_id) != correlations_.end()) { + event->set_name(correlations_.at(r.correlation_id)); + } else { + event->set_name(r.name); + } event->set_start_ns(r.start_ns); event->set_end_ns(r.end_ns); event->set_sub_device_id(r.stream_id); diff --git a/paddle/fluid/platform/device_tracer.h b/paddle/fluid/platform/device_tracer.h index eaf047d4744762f69d50bff8d467da8e3b8317cc..bf0786be2d0fafbf4b610d16ef587ac219399203 100644 --- a/paddle/fluid/platform/device_tracer.h +++ b/paddle/fluid/platform/device_tracer.h @@ -39,6 +39,7 @@ inline uint64_t PosixInNsec() { class DeviceTracer { public: struct KernelRecord { + std::string name; uint64_t start_ns; uint64_t end_ns; int64_t device_id; @@ -84,8 +85,9 @@ class DeviceTracer { // Add a cuda kernel stats. `correlation_id` will be mapped to annotation // added before for human readability. - virtual void AddKernelRecords(uint64_t start, uint64_t end, int64_t device_id, - int64_t stream_id, uint32_t correlation_id) = 0; + virtual void AddKernelRecords(std::string name, uint64_t start, uint64_t end, + int64_t device_id, int64_t stream_id, + uint32_t correlation_id) = 0; // Generate a proto after done (Disabled). virtual proto::Profile GenProfile(const std::string& profile_path) = 0;