From 8fddd8089d0b36479454347fab0c4cca254006e3 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Thu, 1 Jul 2021 14:32:42 +0800 Subject: [PATCH] fix(profiler): respect record_device option GitOrigin-RevId: 7c9a8cfba773061218fa3cda98fcc305333e7945 --- .../src/impl/interpreter/interpreter_impl.cpp | 10 +++---- .../src/impl/profiler/chrome_timeline.cpp | 2 ++ imperative/src/impl/profiler_plugin.cpp | 28 +++++++++---------- .../include/megbrain/imperative/profiler.h | 5 ++++ 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/imperative/src/impl/interpreter/interpreter_impl.cpp b/imperative/src/impl/interpreter/interpreter_impl.cpp index 55b62b48..52e67854 100644 --- a/imperative/src/impl/interpreter/interpreter_impl.cpp +++ b/imperative/src/impl/interpreter/interpreter_impl.cpp @@ -724,14 +724,14 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd) { // Before execute for (auto&& [device, kernel_id]: kernels) { MGB_RECORD_EVENT(KernelLaunchEvent, apply_id, kernel_id, device); - MGB_RECORD_EVENT(RecordDeviceEvent, Timer::record_device(device)); + MGB_RECORD_EVENT_IF((Profiler::get_option("profile_device", 0)), RecordDeviceEvent, Timer::record_device(device)); } // Apply op // Here std::move is REQUIRED for removing duplicated references. auto outputs = apply_on_physical_tensor(apply_on_physical_tensor, *cmd.op, inputs); // After execute for (auto&& [device, kernel_id]: kernels) { - MGB_RECORD_EVENT(RecordDeviceEvent, Timer::record_device(device)); + MGB_RECORD_EVENT_IF((Profiler::get_option("profile_device", 0)), RecordDeviceEvent, Timer::record_device(device)); MGB_RECORD_EVENT(KernelLaunchFinishEvent, apply_id, kernel_id, device); } // End profiling operator @@ -1009,9 +1009,9 @@ void ChannelImpl::process_one_task(Command& icmd) { using T = std::decay_t; if constexpr (std::is_same_v) { MGB_RECORD_EVENT(TensorCommandEvent, cmd.dest->id, TensorCommandKind::Put); - MGB_RECORD_EVENT(RecordDeviceEvent, Timer::record_device(cmd.value.comp_node())); + MGB_RECORD_EVENT_IF((Profiler::get_option("profile_device", 0)), RecordDeviceEvent, Timer::record_device(cmd.value.comp_node())); auto value = cmd.no_cache ? std::make_shared(cmd.value) : Tensor::make(cmd.value); - MGB_RECORD_EVENT(RecordDeviceEvent, Timer::record_device(cmd.value.comp_node())); + MGB_RECORD_EVENT_IF((Profiler::get_option("profile_device", 0)), RecordDeviceEvent, Timer::record_device(cmd.value.comp_node())); produce_tensor(cmd.dest, std::move(value)); MGB_RECORD_EVENT(TensorCommandFinishEvent, cmd.dest->id, TensorCommandKind::Put); sample_on_device(cmd.dest->desc.comp_node, false); @@ -1136,7 +1136,7 @@ void ChannelImpl::process_one_task(Command& icmd) { if (Profiler::get_option("sample_rate", 0)) { sample_on_device(device, true); } - MGB_RECORD_EVENT(RecordDeviceEvent, Timer::record_device(device)); + MGB_RECORD_EVENT_IF((Profiler::get_option("profile_device", 0)), RecordDeviceEvent, Timer::record_device(device)); }); MGB_RECORD_EVENT(StartProfileFinishEvent); } else if constexpr (std::is_same_v) { diff --git a/imperative/src/impl/profiler/chrome_timeline.cpp b/imperative/src/impl/profiler/chrome_timeline.cpp index 3ee8a192..f1954a96 100644 --- a/imperative/src/impl/profiler/chrome_timeline.cpp +++ b/imperative/src/impl/profiler/chrome_timeline.cpp @@ -302,6 +302,8 @@ struct ChromeTimelineEventVisitor: EventVisitor { } else if constexpr (std::is_same_v) { new_host_event("TensorGetProp", 'X') .dur(0).args(current_tensor->detail(current->time)); + } else if constexpr (std::is_same_v) { + new_host_event("TensorWaitProp", 'B'); } else if constexpr (std::is_same_v) { new_host_event(pid_str, 'f') .id(event.tensor_id) diff --git a/imperative/src/impl/profiler_plugin.cpp b/imperative/src/impl/profiler_plugin.cpp index 64b91948..de6a0ee5 100644 --- a/imperative/src/impl/profiler_plugin.cpp +++ b/imperative/src/impl/profiler_plugin.cpp @@ -26,7 +26,7 @@ ProfilerPlugin::ProfilerPlugin(cg::ComputingGraph* graph): PluginBase(graph) { // reset mgb_assert(!event.graph->options().imperative_proxy_graph); CompNode::foreach([](CompNode device){ - Profiler::record(Timer::record_device(device)); + MGB_RECORD_EVENT_IF((Profiler::get_option("profile_device", 0)), RecordDeviceEvent, Timer::record_device(device)); }); if (m_opr_dict.empty() && m_var_dict.empty()) { init_seq(event.exec); @@ -47,22 +47,21 @@ ProfilerPlugin::ProfilerPlugin(cg::ComputingGraph* graph): PluginBase(graph) { Profiler::record("DispatchOprs"); event.exec->iter_opr_seq([this](OperatorNodeBase* opr) -> bool{ auto& opr_info = get_opr_info(opr); - SmallVector inputs; - for (auto input: opr->input()) { - inputs.push_back(get_var_info(input).id); - } - SmallVector outputs; for (auto output: opr->output()) { - outputs.push_back(get_var_info(output).id); + auto& var_id = get_var_info(output).id; + var_id = Profiler::next_id(); + Profiler::record(var_id, output->name()); } auto opr_name = opr->dyn_typeinfo()->name; auto copy_params = [params = opr_info.params] { return *params; }; + SmallVector inputs, outputs; + for (auto input: opr->input()) { + inputs.push_back(get_var_info(input).id); + } for (auto output: opr->output()) { - auto& var_id = get_var_info(output).id; - var_id = Profiler::next_id(); - Profiler::record(var_id, output->name()); + outputs.push_back(get_var_info(output).id); } - Profiler::record(opr_info.id, opr_name, copy_params, inputs, outputs); + Profiler::record(opr_info.id = Profiler::next_id(), opr_name, copy_params, inputs, outputs); return true; }); Profiler::record("DispatchOprs"); @@ -128,12 +127,12 @@ ProfilerPlugin::ProfilerPlugin(cg::ComputingGraph* graph): PluginBase(graph) { auto on_before_kern = [this](BeforeKernel const& event) { OperatorNodeBase* opr = event.opr; Profiler::record(get_opr_info(opr).id, get_opr_info(opr).id, event.comp_node); - Profiler::record(Timer::record_device(event.comp_node)); + MGB_RECORD_EVENT_IF((Profiler::get_option("profile_device", 0)), RecordDeviceEvent, Timer::record_device(event.comp_node)); }; auto on_after_kern = [this](AfterKernel const& event) { OperatorNodeBase* opr = event.opr; - Profiler::record(Timer::record_device(event.comp_node)); - Profiler::record(get_opr_info(opr).id, get_opr_info(opr).id, event.comp_node); + MGB_RECORD_EVENT_IF((Profiler::get_option("profile_device", 0)), RecordDeviceEvent, Timer::record_device(event.comp_node)); + Profiler::record(get_opr_info(opr).id, get_opr_info(opr).id, event.comp_node); }; auto on_graph_compile = [this](const CompSeqOrderDetermined&) { m_opr_dict.clear(); @@ -182,7 +181,6 @@ void ProfilerPlugin::init_seq(cg::AsyncExecutable *comp_seq) { ProfilerPlugin::OprInfo& ProfilerPlugin::register_opr(cg::OperatorNodeBase *opr) { OprInfo info; - info.id = Profiler::next_id(); auto params = std::make_shared>(); auto params_json = opr->to_json(); for (auto&& [k, v]: params_json->cast_final().get_impl()) { diff --git a/imperative/src/include/megbrain/imperative/profiler.h b/imperative/src/include/megbrain/imperative/profiler.h index b33745d4..d897fe7c 100644 --- a/imperative/src/include/megbrain/imperative/profiler.h +++ b/imperative/src/include/megbrain/imperative/profiler.h @@ -233,5 +233,10 @@ public: mgb::imperative::Profiler::record(type{__VA_ARGS__}); \ } \ +#define MGB_RECORD_EVENT_IF(expr, type, ...) \ + if (mgb::imperative::Profiler::is_profiling() && (expr)) { \ + mgb::imperative::Profiler::record(type{__VA_ARGS__}); \ + } \ + } // namespace imperative } // namespace mgb -- GitLab