未验证 提交 019a552b 编写于 作者: C chenjian 提交者: GitHub

fix RecordEvent interface (#39675)

* fix RecordEvent interface

* modify default level to 4

* update interface use

* add const default trace level

* update operator.cc
上级 4e5d6743
......@@ -183,7 +183,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("ext_reorder",
platform::EventRole::kUniqueOp);
platform::TracerEventType::UserDefined,
2, platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait();
} else {
......
......@@ -32,6 +32,7 @@ limitations under the License. */
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/kernel_factory.h"
......@@ -261,10 +262,12 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
// TODO(wangchaochaohu) : refine code to use only one RecordEvent)
// in order to record different op type cost time
// and different op name cost time,we set two event.
platform::RecordEvent op_type_record_event(Type());
platform::RecordEvent op_type_record_event(
Type().c_str(), platform::TracerEventType::Operator, 1);
auto op_name = platform::OpName(outputs_, Type());
platform::RecordEvent op_name_record_event(
op_name, platform::EventRole::kUniqueOp);
op_name, platform::TracerEventType::Operator, 1,
platform::EventRole::kUniqueOp);
RunImpl(scope, place);
}
......@@ -1253,7 +1256,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
Scope* transfer_scope = nullptr;
{
platform::RecordEvent record_event("prepare_data",
platform::EventRole::kInnerOp);
platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
if (need_prepare_data_) {
transfer_scope = PrepareData(scope, *kernel_type_,
&transfered_inplace_vars, runtime_ctx);
......@@ -1265,7 +1269,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
if (!all_kernels_must_compute_runtime_shape_) {
platform::RecordEvent record_event("infer_shape",
platform::EventRole::kInnerOp);
platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
RuntimeInferShapeContext infer_shape_ctx(*this, *runtime_ctx);
this->Info().infer_shape_(&infer_shape_ctx);
}
......@@ -1278,7 +1283,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
// not Scope. Imperative mode only pass inputs and get outputs.
{
platform::RecordEvent record_event("compute",
platform::EventRole::kInnerOp);
platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
if (run_pten_kernel_) {
pten::KernelContext pt_kernel_context;
// Do data transform before building KernelContext
......
......@@ -27,7 +27,7 @@
#endif
#include "paddle/fluid/framework/library_type.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
DECLARE_bool(check_nan_inf);
DECLARE_bool(benchmark);
......@@ -348,16 +348,18 @@ static void PreparedOpRunImpl(
framework::Scope scope;
{
platform::RecordEvent record_event(op.Type() + " infer_shape",
platform::EventRole::kInnerOp);
platform::RecordEvent record_event(op.Type() + "::infer_shape",
platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
DygraphInferShapeContext<VarType> infer_shape_ctx(
&ins, &outs, &attrs, &default_attrs, op.Type(), &kernel_type);
op.Info().infer_shape_(&infer_shape_ctx);
}
{
platform::RecordEvent record_event(op.Type() + " compute",
platform::EventRole::kInnerOp);
platform::RecordEvent record_event(op.Type() + "::compute",
platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
func(DygraphExecutionContext<VarType>(op, scope, *dev_ctx, ctx, ins, outs,
attrs, default_attrs));
......@@ -403,16 +405,18 @@ static void PreparedOpRunPtImpl(
const framework::AttributeMap& attrs,
const framework::AttributeMap& default_attrs) {
{
platform::RecordEvent record_event(op.Type() + " infer_shape",
platform::EventRole::kInnerOp);
platform::RecordEvent record_event(op.Type() + "::infer_shape",
platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
DygraphInferShapeContext<VarType> infer_shape_ctx(
&ins, &outs, &attrs, &default_attrs, op.Type(), &kernel_type);
op.Info().infer_shape_(&infer_shape_ctx);
}
{
platform::RecordEvent record_event(op.Type() + " compute",
platform::EventRole::kInnerOp);
platform::RecordEvent record_event(op.Type() + "::compute",
platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
PreparePtenData<VarType>(pt_kernel, pt_kernel_signature, ins);
......
......@@ -57,8 +57,9 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
handler.AcquireDstMemory(dx, dout->format(), ctx.GetPlace());
auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait();
......@@ -73,8 +74,9 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
handler.AcquireDstMemory(dy, dout->format(), ctx.GetPlace());
auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p,
*reorder_dst_memory_p);
astream.wait();
......
......@@ -57,8 +57,9 @@ class EltwiseSubMKLDNNGradKernel : public ElemwiseGradKernel<T> {
handler.AcquireDstMemory(dx, dout->format(), ctx.GetPlace());
auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait();
......@@ -78,8 +79,9 @@ class EltwiseSubMKLDNNGradKernel : public ElemwiseGradKernel<T> {
reorder_attr.set_output_scales(0, scales);
auto reorder_p = std::make_shared<dnnl::reorder>(
*(reorder_src_memory_p), *(reorder_dst_memory_p), reorder_attr);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p,
*reorder_dst_memory_p);
astream.wait();
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle {
namespace operators {
......@@ -63,8 +63,9 @@ class MarkerOpCPUKernel : public framework::OpKernel<T> {
auto marker_pos = ctx.Attr<std::string>("marker_pos");
platform::RecordEvent record_event(
"MarkerCPU", platform::EventRole::kInnerOp,
"marker_" + marker_role + "_" + marker_pos);
"MarkerCPU", "marker_" + marker_role + "_" + marker_pos,
platform::TracerEventType::OperatorInner, 1,
platform::EventRole::kInnerOp);
}
};
} // namespace operators
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle {
namespace operators {
......@@ -45,8 +45,9 @@ class MarkerOpCUDAKernel : public framework::OpKernel<T> {
auto* in_temp = A.mutable_data<T>({32, 1}, ctx.GetPlace());
auto* out_temp = B.mutable_data<T>({32, 1}, ctx.GetPlace());
platform::RecordEvent record_event(
"MarkerCUDA", platform::EventRole::kInnerOp,
"marker_" + marker_role + "_" + marker_pos);
"MarkerCUDA", "marker_" + marker_role + "_" + marker_pos,
platform::TracerEventType::OperatorInner, 1,
platform::EventRole::kInnerOp);
SimpleMarkerKernel<T><<<1, 32, 0, dev_ctx.stream()>>>(in_temp, out_temp,
32);
}
......
......@@ -976,8 +976,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> {
handler.AcquireReorder(reorder_dst_memory_p, diff_weights_memory_p);
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *diff_weights_memory_p,
*reorder_dst_memory_p);
astream.wait();
......
......@@ -264,8 +264,9 @@ class ConvTransposeMKLDNNHandlerT
dev_ctx.SetBlob(key_reorder_p, reorder_p);
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
......@@ -286,8 +287,9 @@ class ConvTransposeMKLDNNHandlerT
auto reorder_p = std::static_pointer_cast<dnnl::reorder>(
dev_ctx.GetBlob(key_reorder_p));
if (reorder_p != nullptr) {
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
......
......@@ -284,8 +284,9 @@ class FCPrimitiveFactory {
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, *dst_mem);
astream.wait();
}
......@@ -312,8 +313,9 @@ class FCPrimitiveFactory {
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder.execute(astream,
{{DNNL_ARG_FROM, *src_mem}, {DNNL_ARG_TO, *dst_mem}});
astream.wait();
......
......@@ -116,8 +116,9 @@ class MulPrimitiveFactory {
auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, dst_mem);
astream.wait();
}
......@@ -277,8 +278,9 @@ class MulPrimitiveFactory {
auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, dst_mem);
astream.wait();
}
......
......@@ -23,7 +23,7 @@ limitations under the License. */
#include "dnnl.hpp"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle {
#ifdef PADDLE_WITH_MKLDNN
using MKLDNNMemoryFormat = dnnl::memory::format_tag;
......@@ -190,7 +190,8 @@ inline void Reorder(dnnl::memory src, dnnl::memory dst,
auto reorder_prim = dnnl::reorder(src, dst);
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::TracerEventType::UserDefined,
2, platform::EventRole::kUniqueOp);
reorder_prim.execute(astream, src, dst);
astream.wait();
}
......
......@@ -197,7 +197,8 @@ class MKLDNNHandlerNoCachingT {
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::TracerEventType::UserDefined,
2, platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
......@@ -221,8 +222,9 @@ class MKLDNNHandlerNoCachingT {
std::make_shared<dnnl::reorder>(*user_memory_p, *target_memory_p);
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
......@@ -514,7 +516,8 @@ class MKLDNNHandlerT {
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::TracerEventType::UserDefined,
2, platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
......@@ -558,8 +561,9 @@ class MKLDNNHandlerT {
dev_ctx_.SetBlob(key_reorder_p, reorder_p);
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
......@@ -580,8 +584,9 @@ class MKLDNNHandlerT {
auto reorder_p = std::static_pointer_cast<dnnl::reorder>(
dev_ctx_.GetBlob(key_reorder_p));
if (reorder_p != nullptr) {
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
......
......@@ -66,8 +66,8 @@ double Event::CudaElapsedMs(const Event &e) const {
#endif
}
RecordEvent::RecordEvent(const char *name, const EventRole role,
uint32_t level) {
RecordEvent::RecordEvent(const char *name, const TracerEventType type,
uint32_t level, const EventRole role) {
#ifndef _WIN32
#ifdef PADDLE_WITH_CUDA
if (g_enable_nvprof_hook) {
......@@ -86,11 +86,12 @@ RecordEvent::RecordEvent(const char *name, const EventRole role,
is_enabled_ = true;
shallow_copy_name_ = name;
role_ = role;
type_ = type;
start_ns_ = PosixInNsec();
}
RecordEvent::RecordEvent(const std::string &name, const EventRole role,
uint32_t level) {
RecordEvent::RecordEvent(const std::string &name, const TracerEventType type,
uint32_t level, const EventRole role) {
#ifndef _WIN32
#ifdef PADDLE_WITH_CUDA
if (g_enable_nvprof_hook) {
......@@ -109,11 +110,13 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role,
is_enabled_ = true;
name_ = new std::string(name);
role_ = role;
type_ = type;
start_ns_ = PosixInNsec();
}
RecordEvent::RecordEvent(const std::string &name, const EventRole role,
const std::string &attr, uint32_t level) {
RecordEvent::RecordEvent(const std::string &name, const std::string &attr,
const TracerEventType type, uint32_t level,
const EventRole role) {
#ifndef _WIN32
#ifdef PADDLE_WITH_CUDA
if (g_enable_nvprof_hook) {
......@@ -130,6 +133,7 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role,
return;
}
is_enabled_ = true;
type_ = type;
name_ = new std::string(name);
start_ns_ = PosixInNsec();
attr_ = new std::string(attr);
......@@ -164,17 +168,15 @@ void RecordEvent::End() {
uint64_t end_ns = PosixInNsec();
if (LIKELY(FLAGS_enable_host_event_recorder_hook && is_enabled_)) {
if (LIKELY(shallow_copy_name_ != nullptr)) {
HostEventRecorder::GetInstance().RecordEvent(shallow_copy_name_,
start_ns_, end_ns, role_,
TracerEventType::NumTypes);
HostEventRecorder::GetInstance().RecordEvent(
shallow_copy_name_, start_ns_, end_ns, role_, type_);
} else if (name_ != nullptr) {
if (attr_ == nullptr) {
HostEventRecorder::GetInstance().RecordEvent(
*name_, start_ns_, end_ns, role_, TracerEventType::NumTypes);
HostEventRecorder::GetInstance().RecordEvent(*name_, start_ns_, end_ns,
role_, type_);
} else {
HostEventRecorder::GetInstance().RecordEvent(
*name_, start_ns_, end_ns, role_, TracerEventType::NumTypes,
*attr_);
HostEventRecorder::GetInstance().RecordEvent(*name_, start_ns_, end_ns,
role_, type_, *attr_);
delete attr_;
}
delete name_;
......@@ -301,7 +303,7 @@ void PopMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
void Mark(const std::string &name) {
if (FLAGS_enable_host_event_recorder_hook) {
HostEventRecorder::GetInstance().RecordEvent(
name, 0, 0, EventRole::kOrdinary, TracerEventType::NumTypes);
name, 0, 0, EventRole::kOrdinary, TracerEventType::UserDefined);
return;
}
GetEventList().Record(EventType::kMark, name, g_thread_id);
......
......@@ -21,12 +21,13 @@ limitations under the License. */
namespace paddle {
namespace platform {
static constexpr uint32_t kDefaultTraceLevel = 4;
// CPU event tracing. A trace marks something that happens but has no duration
// associated with it. For example, thread starts working.
// Chrome Trace Viewer Format: Instant Event
struct RecordInstantEvent {
explicit RecordInstantEvent(const char* name, TracerEventType type,
uint32_t level = 1);
uint32_t level = kDefaultTraceLevel);
};
// CPU event tracing. A trace starts when an object of this clas is created and
......@@ -34,16 +35,21 @@ struct RecordInstantEvent {
// Chrome Trace Viewer Format: Duration Event/Complte Event
class RecordEvent {
public:
explicit RecordEvent(const std::string& name,
const EventRole role = EventRole::kOrdinary,
uint32_t level = 1);
explicit RecordEvent(
const std::string& name,
const TracerEventType type = TracerEventType::UserDefined,
uint32_t level = kDefaultTraceLevel,
const EventRole role = EventRole::kOrdinary);
explicit RecordEvent(const char* name,
const EventRole role = EventRole::kOrdinary,
uint32_t level = 1);
explicit RecordEvent(const char* name, const TracerEventType type =
TracerEventType::UserDefined,
uint32_t level = kDefaultTraceLevel,
const EventRole role = EventRole::kOrdinary);
RecordEvent(const std::string& name, const EventRole role,
const std::string& attr, uint32_t level = 1);
RecordEvent(const std::string& name, const std::string& attr,
const TracerEventType type = TracerEventType::UserDefined,
uint32_t level = kDefaultTraceLevel,
const EventRole role = EventRole::kOrdinary);
// Stop event tracing explicitly before the object goes out of scope.
// Sometimes it's inconvenient to use RAII
......@@ -65,6 +71,7 @@ class RecordEvent {
// different kernel invocations within an op.
// std::string full_name_;
EventRole role_{EventRole::kOrdinary};
TracerEventType type_{TracerEventType::UserDefined};
std::string* attr_{nullptr};
bool finished_{false};
};
......
......@@ -36,6 +36,18 @@ enum class TracerEventType {
Memset = 6,
// Used to mark record defined by user
UserDefined = 7,
// Used to mark operator detail, (such as infer shape, compute)
OperatorInner = 8,
// Used to mark model training or testing perspective, forward process
Forward = 9,
// Used to mark model training perspective, backward process
Backward = 10,
// Used to mark model training perspective, optimization process
Optimization = 11,
// Used to mark distributed training perspective
Communication = 12,
// Used to mark python api
PythonOp = 13,
// A flag to denote the number of current types
NumTypes
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册