未验证 提交 019a552b 编写于 作者: C chenjian 提交者: GitHub

fix RecordEvent interface (#39675)

* fix RecordEvent interface

* modify default level to 4

* update interface use

* add const default trace level

* update operator.cc
上级 4e5d6743
...@@ -183,7 +183,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, ...@@ -183,7 +183,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("ext_reorder", platform::RecordEvent record_reorder("ext_reorder",
platform::EventRole::kUniqueOp); platform::TracerEventType::UserDefined,
2, platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait(); astream.wait();
} else { } else {
......
...@@ -32,6 +32,7 @@ limitations under the License. */ ...@@ -32,6 +32,7 @@ limitations under the License. */
#include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/pten/common/scalar.h" #include "paddle/pten/common/scalar.h"
#include "paddle/pten/common/scalar_array.h" #include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/kernel_factory.h" #include "paddle/pten/core/kernel_factory.h"
...@@ -261,10 +262,12 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { ...@@ -261,10 +262,12 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
// TODO(wangchaochaohu) : refine code to use only one RecordEvent) // TODO(wangchaochaohu) : refine code to use only one RecordEvent)
// in order to record different op type cost time // in order to record different op type cost time
// and different op name cost time,we set two event. // and different op name cost time,we set two event.
platform::RecordEvent op_type_record_event(Type()); platform::RecordEvent op_type_record_event(
Type().c_str(), platform::TracerEventType::Operator, 1);
auto op_name = platform::OpName(outputs_, Type()); auto op_name = platform::OpName(outputs_, Type());
platform::RecordEvent op_name_record_event( platform::RecordEvent op_name_record_event(
op_name, platform::EventRole::kUniqueOp); op_name, platform::TracerEventType::Operator, 1,
platform::EventRole::kUniqueOp);
RunImpl(scope, place); RunImpl(scope, place);
} }
...@@ -1253,7 +1256,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -1253,7 +1256,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
Scope* transfer_scope = nullptr; Scope* transfer_scope = nullptr;
{ {
platform::RecordEvent record_event("prepare_data", platform::RecordEvent record_event("prepare_data",
platform::EventRole::kInnerOp); platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
if (need_prepare_data_) { if (need_prepare_data_) {
transfer_scope = PrepareData(scope, *kernel_type_, transfer_scope = PrepareData(scope, *kernel_type_,
&transfered_inplace_vars, runtime_ctx); &transfered_inplace_vars, runtime_ctx);
...@@ -1265,7 +1269,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -1265,7 +1269,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
if (!all_kernels_must_compute_runtime_shape_) { if (!all_kernels_must_compute_runtime_shape_) {
platform::RecordEvent record_event("infer_shape", platform::RecordEvent record_event("infer_shape",
platform::EventRole::kInnerOp); platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
RuntimeInferShapeContext infer_shape_ctx(*this, *runtime_ctx); RuntimeInferShapeContext infer_shape_ctx(*this, *runtime_ctx);
this->Info().infer_shape_(&infer_shape_ctx); this->Info().infer_shape_(&infer_shape_ctx);
} }
...@@ -1278,7 +1283,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -1278,7 +1283,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
// not Scope. Imperative mode only pass inputs and get outputs. // not Scope. Imperative mode only pass inputs and get outputs.
{ {
platform::RecordEvent record_event("compute", platform::RecordEvent record_event("compute",
platform::EventRole::kInnerOp); platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
if (run_pten_kernel_) { if (run_pten_kernel_) {
pten::KernelContext pt_kernel_context; pten::KernelContext pt_kernel_context;
// Do data transform before building KernelContext // Do data transform before building KernelContext
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
#endif #endif
#include "paddle/fluid/framework/library_type.h" #include "paddle/fluid/framework/library_type.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler/event_tracing.h"
DECLARE_bool(check_nan_inf); DECLARE_bool(check_nan_inf);
DECLARE_bool(benchmark); DECLARE_bool(benchmark);
...@@ -348,16 +348,18 @@ static void PreparedOpRunImpl( ...@@ -348,16 +348,18 @@ static void PreparedOpRunImpl(
framework::Scope scope; framework::Scope scope;
{ {
platform::RecordEvent record_event(op.Type() + " infer_shape", platform::RecordEvent record_event(op.Type() + "::infer_shape",
platform::EventRole::kInnerOp); platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
DygraphInferShapeContext<VarType> infer_shape_ctx( DygraphInferShapeContext<VarType> infer_shape_ctx(
&ins, &outs, &attrs, &default_attrs, op.Type(), &kernel_type); &ins, &outs, &attrs, &default_attrs, op.Type(), &kernel_type);
op.Info().infer_shape_(&infer_shape_ctx); op.Info().infer_shape_(&infer_shape_ctx);
} }
{ {
platform::RecordEvent record_event(op.Type() + " compute", platform::RecordEvent record_event(op.Type() + "::compute",
platform::EventRole::kInnerOp); platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
func(DygraphExecutionContext<VarType>(op, scope, *dev_ctx, ctx, ins, outs, func(DygraphExecutionContext<VarType>(op, scope, *dev_ctx, ctx, ins, outs,
attrs, default_attrs)); attrs, default_attrs));
...@@ -403,16 +405,18 @@ static void PreparedOpRunPtImpl( ...@@ -403,16 +405,18 @@ static void PreparedOpRunPtImpl(
const framework::AttributeMap& attrs, const framework::AttributeMap& attrs,
const framework::AttributeMap& default_attrs) { const framework::AttributeMap& default_attrs) {
{ {
platform::RecordEvent record_event(op.Type() + " infer_shape", platform::RecordEvent record_event(op.Type() + "::infer_shape",
platform::EventRole::kInnerOp); platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
DygraphInferShapeContext<VarType> infer_shape_ctx( DygraphInferShapeContext<VarType> infer_shape_ctx(
&ins, &outs, &attrs, &default_attrs, op.Type(), &kernel_type); &ins, &outs, &attrs, &default_attrs, op.Type(), &kernel_type);
op.Info().infer_shape_(&infer_shape_ctx); op.Info().infer_shape_(&infer_shape_ctx);
} }
{ {
platform::RecordEvent record_event(op.Type() + " compute", platform::RecordEvent record_event(op.Type() + "::compute",
platform::EventRole::kInnerOp); platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
PreparePtenData<VarType>(pt_kernel, pt_kernel_signature, ins); PreparePtenData<VarType>(pt_kernel, pt_kernel_signature, ins);
......
...@@ -57,8 +57,9 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> { ...@@ -57,8 +57,9 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
handler.AcquireDstMemory(dx, dout->format(), ctx.GetPlace()); handler.AcquireDstMemory(dx, dout->format(), ctx.GetPlace());
auto reorder_p = auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p); handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder(
platform::EventRole::kUniqueOp); "int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait(); astream.wait();
...@@ -73,8 +74,9 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> { ...@@ -73,8 +74,9 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
handler.AcquireDstMemory(dy, dout->format(), ctx.GetPlace()); handler.AcquireDstMemory(dy, dout->format(), ctx.GetPlace());
auto reorder_p = auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p); handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder(
platform::EventRole::kUniqueOp); "int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, reorder_p->execute(astream, *reorder_src_memory_p,
*reorder_dst_memory_p); *reorder_dst_memory_p);
astream.wait(); astream.wait();
......
...@@ -57,8 +57,9 @@ class EltwiseSubMKLDNNGradKernel : public ElemwiseGradKernel<T> { ...@@ -57,8 +57,9 @@ class EltwiseSubMKLDNNGradKernel : public ElemwiseGradKernel<T> {
handler.AcquireDstMemory(dx, dout->format(), ctx.GetPlace()); handler.AcquireDstMemory(dx, dout->format(), ctx.GetPlace());
auto reorder_p = auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p); handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder(
platform::EventRole::kUniqueOp); "int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait(); astream.wait();
...@@ -78,8 +79,9 @@ class EltwiseSubMKLDNNGradKernel : public ElemwiseGradKernel<T> { ...@@ -78,8 +79,9 @@ class EltwiseSubMKLDNNGradKernel : public ElemwiseGradKernel<T> {
reorder_attr.set_output_scales(0, scales); reorder_attr.set_output_scales(0, scales);
auto reorder_p = std::make_shared<dnnl::reorder>( auto reorder_p = std::make_shared<dnnl::reorder>(
*(reorder_src_memory_p), *(reorder_dst_memory_p), reorder_attr); *(reorder_src_memory_p), *(reorder_dst_memory_p), reorder_attr);
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder(
platform::EventRole::kUniqueOp); "int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, reorder_p->execute(astream, *reorder_src_memory_p,
*reorder_dst_memory_p); *reorder_dst_memory_p);
astream.wait(); astream.wait();
......
...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -63,8 +63,9 @@ class MarkerOpCPUKernel : public framework::OpKernel<T> { ...@@ -63,8 +63,9 @@ class MarkerOpCPUKernel : public framework::OpKernel<T> {
auto marker_pos = ctx.Attr<std::string>("marker_pos"); auto marker_pos = ctx.Attr<std::string>("marker_pos");
platform::RecordEvent record_event( platform::RecordEvent record_event(
"MarkerCPU", platform::EventRole::kInnerOp, "MarkerCPU", "marker_" + marker_role + "_" + marker_pos,
"marker_" + marker_role + "_" + marker_pos); platform::TracerEventType::OperatorInner, 1,
platform::EventRole::kInnerOp);
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -45,8 +45,9 @@ class MarkerOpCUDAKernel : public framework::OpKernel<T> { ...@@ -45,8 +45,9 @@ class MarkerOpCUDAKernel : public framework::OpKernel<T> {
auto* in_temp = A.mutable_data<T>({32, 1}, ctx.GetPlace()); auto* in_temp = A.mutable_data<T>({32, 1}, ctx.GetPlace());
auto* out_temp = B.mutable_data<T>({32, 1}, ctx.GetPlace()); auto* out_temp = B.mutable_data<T>({32, 1}, ctx.GetPlace());
platform::RecordEvent record_event( platform::RecordEvent record_event(
"MarkerCUDA", platform::EventRole::kInnerOp, "MarkerCUDA", "marker_" + marker_role + "_" + marker_pos,
"marker_" + marker_role + "_" + marker_pos); platform::TracerEventType::OperatorInner, 1,
platform::EventRole::kInnerOp);
SimpleMarkerKernel<T><<<1, 32, 0, dev_ctx.stream()>>>(in_temp, out_temp, SimpleMarkerKernel<T><<<1, 32, 0, dev_ctx.stream()>>>(in_temp, out_temp,
32); 32);
} }
......
...@@ -976,8 +976,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> { ...@@ -976,8 +976,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> {
handler.AcquireReorder(reorder_dst_memory_p, diff_weights_memory_p); handler.AcquireReorder(reorder_dst_memory_p, diff_weights_memory_p);
{ {
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder(
platform::EventRole::kUniqueOp); "int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *diff_weights_memory_p, reorder_p->execute(astream, *diff_weights_memory_p,
*reorder_dst_memory_p); *reorder_dst_memory_p);
astream.wait(); astream.wait();
......
...@@ -264,8 +264,9 @@ class ConvTransposeMKLDNNHandlerT ...@@ -264,8 +264,9 @@ class ConvTransposeMKLDNNHandlerT
dev_ctx.SetBlob(key_reorder_p, reorder_p); dev_ctx.SetBlob(key_reorder_p, reorder_p);
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder(
platform::EventRole::kUniqueOp); "int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}}); {DNNL_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
...@@ -286,8 +287,9 @@ class ConvTransposeMKLDNNHandlerT ...@@ -286,8 +287,9 @@ class ConvTransposeMKLDNNHandlerT
auto reorder_p = std::static_pointer_cast<dnnl::reorder>( auto reorder_p = std::static_pointer_cast<dnnl::reorder>(
dev_ctx.GetBlob(key_reorder_p)); dev_ctx.GetBlob(key_reorder_p));
if (reorder_p != nullptr) { if (reorder_p != nullptr) {
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder(
platform::EventRole::kUniqueOp); "int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}}); {DNNL_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
......
...@@ -284,8 +284,9 @@ class FCPrimitiveFactory { ...@@ -284,8 +284,9 @@ class FCPrimitiveFactory {
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
{ {
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder(
platform::EventRole::kUniqueOp); "int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, *dst_mem); reorder.execute(astream, src_mem, *dst_mem);
astream.wait(); astream.wait();
} }
...@@ -312,8 +313,9 @@ class FCPrimitiveFactory { ...@@ -312,8 +313,9 @@ class FCPrimitiveFactory {
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
{ {
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder(
platform::EventRole::kUniqueOp); "int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder.execute(astream, reorder.execute(astream,
{{DNNL_ARG_FROM, *src_mem}, {DNNL_ARG_TO, *dst_mem}}); {{DNNL_ARG_FROM, *src_mem}, {DNNL_ARG_TO, *dst_mem}});
astream.wait(); astream.wait();
......
...@@ -116,8 +116,9 @@ class MulPrimitiveFactory { ...@@ -116,8 +116,9 @@ class MulPrimitiveFactory {
auto &astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
{ {
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder(
platform::EventRole::kUniqueOp); "int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, dst_mem); reorder.execute(astream, src_mem, dst_mem);
astream.wait(); astream.wait();
} }
...@@ -277,8 +278,9 @@ class MulPrimitiveFactory { ...@@ -277,8 +278,9 @@ class MulPrimitiveFactory {
auto &astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
{ {
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder(
platform::EventRole::kUniqueOp); "int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, dst_mem); reorder.execute(astream, src_mem, dst_mem);
astream.wait(); astream.wait();
} }
......
...@@ -23,7 +23,7 @@ limitations under the License. */ ...@@ -23,7 +23,7 @@ limitations under the License. */
#include "dnnl.hpp" #include "dnnl.hpp"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle { namespace paddle {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
using MKLDNNMemoryFormat = dnnl::memory::format_tag; using MKLDNNMemoryFormat = dnnl::memory::format_tag;
...@@ -190,7 +190,8 @@ inline void Reorder(dnnl::memory src, dnnl::memory dst, ...@@ -190,7 +190,8 @@ inline void Reorder(dnnl::memory src, dnnl::memory dst,
auto reorder_prim = dnnl::reorder(src, dst); auto reorder_prim = dnnl::reorder(src, dst);
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp); platform::TracerEventType::UserDefined,
2, platform::EventRole::kUniqueOp);
reorder_prim.execute(astream, src, dst); reorder_prim.execute(astream, src, dst);
astream.wait(); astream.wait();
} }
......
...@@ -197,7 +197,8 @@ class MKLDNNHandlerNoCachingT { ...@@ -197,7 +197,8 @@ class MKLDNNHandlerNoCachingT {
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp); platform::TracerEventType::UserDefined,
2, platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}}); {DNNL_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
...@@ -221,8 +222,9 @@ class MKLDNNHandlerNoCachingT { ...@@ -221,8 +222,9 @@ class MKLDNNHandlerNoCachingT {
std::make_shared<dnnl::reorder>(*user_memory_p, *target_memory_p); std::make_shared<dnnl::reorder>(*user_memory_p, *target_memory_p);
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder(
platform::EventRole::kUniqueOp); "int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}}); {DNNL_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
...@@ -514,7 +516,8 @@ class MKLDNNHandlerT { ...@@ -514,7 +516,8 @@ class MKLDNNHandlerT {
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp); platform::TracerEventType::UserDefined,
2, platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}}); {DNNL_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
...@@ -558,8 +561,9 @@ class MKLDNNHandlerT { ...@@ -558,8 +561,9 @@ class MKLDNNHandlerT {
dev_ctx_.SetBlob(key_reorder_p, reorder_p); dev_ctx_.SetBlob(key_reorder_p, reorder_p);
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder(
platform::EventRole::kUniqueOp); "int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}}); {DNNL_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
...@@ -580,8 +584,9 @@ class MKLDNNHandlerT { ...@@ -580,8 +584,9 @@ class MKLDNNHandlerT {
auto reorder_p = std::static_pointer_cast<dnnl::reorder>( auto reorder_p = std::static_pointer_cast<dnnl::reorder>(
dev_ctx_.GetBlob(key_reorder_p)); dev_ctx_.GetBlob(key_reorder_p));
if (reorder_p != nullptr) { if (reorder_p != nullptr) {
platform::RecordEvent record_reorder("int_reorder", platform::RecordEvent record_reorder(
platform::EventRole::kUniqueOp); "int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}}); {DNNL_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
......
...@@ -66,8 +66,8 @@ double Event::CudaElapsedMs(const Event &e) const { ...@@ -66,8 +66,8 @@ double Event::CudaElapsedMs(const Event &e) const {
#endif #endif
} }
RecordEvent::RecordEvent(const char *name, const EventRole role, RecordEvent::RecordEvent(const char *name, const TracerEventType type,
uint32_t level) { uint32_t level, const EventRole role) {
#ifndef _WIN32 #ifndef _WIN32
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (g_enable_nvprof_hook) { if (g_enable_nvprof_hook) {
...@@ -86,11 +86,12 @@ RecordEvent::RecordEvent(const char *name, const EventRole role, ...@@ -86,11 +86,12 @@ RecordEvent::RecordEvent(const char *name, const EventRole role,
is_enabled_ = true; is_enabled_ = true;
shallow_copy_name_ = name; shallow_copy_name_ = name;
role_ = role; role_ = role;
type_ = type;
start_ns_ = PosixInNsec(); start_ns_ = PosixInNsec();
} }
RecordEvent::RecordEvent(const std::string &name, const EventRole role, RecordEvent::RecordEvent(const std::string &name, const TracerEventType type,
uint32_t level) { uint32_t level, const EventRole role) {
#ifndef _WIN32 #ifndef _WIN32
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (g_enable_nvprof_hook) { if (g_enable_nvprof_hook) {
...@@ -109,11 +110,13 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role, ...@@ -109,11 +110,13 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role,
is_enabled_ = true; is_enabled_ = true;
name_ = new std::string(name); name_ = new std::string(name);
role_ = role; role_ = role;
type_ = type;
start_ns_ = PosixInNsec(); start_ns_ = PosixInNsec();
} }
RecordEvent::RecordEvent(const std::string &name, const EventRole role, RecordEvent::RecordEvent(const std::string &name, const std::string &attr,
const std::string &attr, uint32_t level) { const TracerEventType type, uint32_t level,
const EventRole role) {
#ifndef _WIN32 #ifndef _WIN32
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (g_enable_nvprof_hook) { if (g_enable_nvprof_hook) {
...@@ -130,6 +133,7 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role, ...@@ -130,6 +133,7 @@ RecordEvent::RecordEvent(const std::string &name, const EventRole role,
return; return;
} }
is_enabled_ = true; is_enabled_ = true;
type_ = type;
name_ = new std::string(name); name_ = new std::string(name);
start_ns_ = PosixInNsec(); start_ns_ = PosixInNsec();
attr_ = new std::string(attr); attr_ = new std::string(attr);
...@@ -164,17 +168,15 @@ void RecordEvent::End() { ...@@ -164,17 +168,15 @@ void RecordEvent::End() {
uint64_t end_ns = PosixInNsec(); uint64_t end_ns = PosixInNsec();
if (LIKELY(FLAGS_enable_host_event_recorder_hook && is_enabled_)) { if (LIKELY(FLAGS_enable_host_event_recorder_hook && is_enabled_)) {
if (LIKELY(shallow_copy_name_ != nullptr)) { if (LIKELY(shallow_copy_name_ != nullptr)) {
HostEventRecorder::GetInstance().RecordEvent(shallow_copy_name_, HostEventRecorder::GetInstance().RecordEvent(
start_ns_, end_ns, role_, shallow_copy_name_, start_ns_, end_ns, role_, type_);
TracerEventType::NumTypes);
} else if (name_ != nullptr) { } else if (name_ != nullptr) {
if (attr_ == nullptr) { if (attr_ == nullptr) {
HostEventRecorder::GetInstance().RecordEvent( HostEventRecorder::GetInstance().RecordEvent(*name_, start_ns_, end_ns,
*name_, start_ns_, end_ns, role_, TracerEventType::NumTypes); role_, type_);
} else { } else {
HostEventRecorder::GetInstance().RecordEvent( HostEventRecorder::GetInstance().RecordEvent(*name_, start_ns_, end_ns,
*name_, start_ns_, end_ns, role_, TracerEventType::NumTypes, role_, type_, *attr_);
*attr_);
delete attr_; delete attr_;
} }
delete name_; delete name_;
...@@ -301,7 +303,7 @@ void PopMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes, ...@@ -301,7 +303,7 @@ void PopMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
void Mark(const std::string &name) { void Mark(const std::string &name) {
if (FLAGS_enable_host_event_recorder_hook) { if (FLAGS_enable_host_event_recorder_hook) {
HostEventRecorder::GetInstance().RecordEvent( HostEventRecorder::GetInstance().RecordEvent(
name, 0, 0, EventRole::kOrdinary, TracerEventType::NumTypes); name, 0, 0, EventRole::kOrdinary, TracerEventType::UserDefined);
return; return;
} }
GetEventList().Record(EventType::kMark, name, g_thread_id); GetEventList().Record(EventType::kMark, name, g_thread_id);
......
...@@ -21,12 +21,13 @@ limitations under the License. */ ...@@ -21,12 +21,13 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace platform { namespace platform {
static constexpr uint32_t kDefaultTraceLevel = 4;
// CPU event tracing. A trace marks something that happens but has no duration // CPU event tracing. A trace marks something that happens but has no duration
// associated with it. For example, thread starts working. // associated with it. For example, thread starts working.
// Chrome Trace Viewer Format: Instant Event // Chrome Trace Viewer Format: Instant Event
struct RecordInstantEvent { struct RecordInstantEvent {
explicit RecordInstantEvent(const char* name, TracerEventType type, explicit RecordInstantEvent(const char* name, TracerEventType type,
uint32_t level = 1); uint32_t level = kDefaultTraceLevel);
}; };
// CPU event tracing. A trace starts when an object of this clas is created and // CPU event tracing. A trace starts when an object of this clas is created and
...@@ -34,16 +35,21 @@ struct RecordInstantEvent { ...@@ -34,16 +35,21 @@ struct RecordInstantEvent {
// Chrome Trace Viewer Format: Duration Event/Complte Event // Chrome Trace Viewer Format: Duration Event/Complte Event
class RecordEvent { class RecordEvent {
public: public:
explicit RecordEvent(const std::string& name, explicit RecordEvent(
const EventRole role = EventRole::kOrdinary, const std::string& name,
uint32_t level = 1); const TracerEventType type = TracerEventType::UserDefined,
uint32_t level = kDefaultTraceLevel,
const EventRole role = EventRole::kOrdinary);
explicit RecordEvent(const char* name, explicit RecordEvent(const char* name, const TracerEventType type =
const EventRole role = EventRole::kOrdinary, TracerEventType::UserDefined,
uint32_t level = 1); uint32_t level = kDefaultTraceLevel,
const EventRole role = EventRole::kOrdinary);
RecordEvent(const std::string& name, const EventRole role, RecordEvent(const std::string& name, const std::string& attr,
const std::string& attr, uint32_t level = 1); const TracerEventType type = TracerEventType::UserDefined,
uint32_t level = kDefaultTraceLevel,
const EventRole role = EventRole::kOrdinary);
// Stop event tracing explicitly before the object goes out of scope. // Stop event tracing explicitly before the object goes out of scope.
// Sometimes it's inconvenient to use RAII // Sometimes it's inconvenient to use RAII
...@@ -65,6 +71,7 @@ class RecordEvent { ...@@ -65,6 +71,7 @@ class RecordEvent {
// different kernel invocations within an op. // different kernel invocations within an op.
// std::string full_name_; // std::string full_name_;
EventRole role_{EventRole::kOrdinary}; EventRole role_{EventRole::kOrdinary};
TracerEventType type_{TracerEventType::UserDefined};
std::string* attr_{nullptr}; std::string* attr_{nullptr};
bool finished_{false}; bool finished_{false};
}; };
......
...@@ -36,6 +36,18 @@ enum class TracerEventType { ...@@ -36,6 +36,18 @@ enum class TracerEventType {
Memset = 6, Memset = 6,
// Used to mark record defined by user // Used to mark record defined by user
UserDefined = 7, UserDefined = 7,
// Used to mark operator detail, (such as infer shape, compute)
OperatorInner = 8,
// Used to mark model training or testing perspective, forward process
Forward = 9,
// Used to mark model training perspective, backward process
Backward = 10,
// Used to mark model training perspective, optimization process
Optimization = 11,
// Used to mark distributed training perspective
Communication = 12,
// Used to mark python api
PythonOp = 13,
// A flag to denote the number of current types // A flag to denote the number of current types
NumTypes NumTypes
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册