提交 9d47c3ba 编写于 作者: M Megvii Engine Team 提交者: huangxinda

feat(profiler): imperative profiler support tracing

GitOrigin-RevId: b247472feba6d28416f52874c3517e50a8c2bd49
上级 cdcb46ba
......@@ -17,7 +17,7 @@ from typing import Any
import numpy as np
from ..core._imperative_rt import GraphProfiler, SerializationMetadata
from ..core._imperative_rt import GraphProfiler, GraphProfiler2, SerializationMetadata
from ..core._imperative_rt.core2 import Tensor as RawTensor
from ..core._imperative_rt.core2 import (
TensorWeakRef,
......@@ -39,6 +39,7 @@ from ..core.ops.special import Const
from ..core.tensor import megbrain_graph as G
from ..core.tensor.utils import setscalar
from ..utils.naming import AutoNaming
from ..utils.profiler import is_profiling
from .dtr_config import DTRConfig
from .graph_opt_config import GraphOptimizationConfig
from .sublinear_memory_config import SublinearMemoryConfig
......@@ -160,6 +161,7 @@ class trace:
self._dtr_config = dtr_config
self._profiling = profiling
self._profiler = None
self._profiler2 = None
self._graph_opt_level = opt_level
self._graph_opt_config = graph_opt_config
self._symbolic_shape = symbolic_shape
......@@ -382,7 +384,8 @@ class trace:
lazy_eval_graph.options.graph_opt_level = self._graph_opt_level
lazy_eval_graph._set_priority_to_id([*lazy_eval_links, *readers])
lazy_eval_graph.compile(*lazy_eval_links, *readers)
lazy_eval_graph()
self._execute_graph(lazy_eval_graph)
lazy_eval_graph.wait()
for r, x in zip(readers, lazy_eval_tensors):
# get values from lazy_eval_graph and assign to lazy_eval tensor
x._handle = RawTensor(r.op.get_value())._handle
......@@ -401,7 +404,7 @@ class trace:
else:
if self._graph is None:
self._compile()
self._graph.execute()
self._execute_graph(self._graph)
def do_finalize():
escaped_tensors = self._take_escaped_tensors()
......@@ -532,9 +535,17 @@ class trace:
# profile
if self._profiling:
self._profiler = GraphProfiler(graph)
self._profiler2 = None
if int(os.getenv("MEGENGINE_INPLACE_UPDATE", "0")):
graph.options.var_sanity_check_first_run = False
def _execute_graph(self, graph: G.Graph, *args):
if is_profiling() and (self._profiler2 is None):
self._profiler2 = GraphProfiler2(graph)
elif not is_profiling() and (self._profiler2 is not None):
self._profiler2 = None
graph.execute(*args)
def _compile(self):
graph = self._graph = G.Graph()
graph.options.async_exec_level = 0b100
......
......@@ -23,7 +23,7 @@
#include "./common.h"
#include "./ops.h"
#include "megbrain/gopt/inference.h"
#include "megbrain/imperative/profiler_plugin.h"
namespace py = pybind11;
......@@ -239,6 +239,10 @@ void init_graph_rt(py::module m) {
}))
.def("get", [](_CompGraphProfilerImpl& profiler) { return profiler._get_result(); });
using interpreter::intl::ProfilerPlugin;
py::class_<ProfilerPlugin, std::shared_ptr<ProfilerPlugin>>(m, "GraphProfiler2")
.def(py::init<cg::ComputingGraph*>());
auto GraphOptimizeOptions = py::class_<_OptimizeForInferenceOptions>(m, "GraphOptimizeOptions")
.def(py::init())
.def("serialize", &_OptimizeForInferenceOptions::serialize)
......
/**
* \file imperative/src/impl/profiler_plugin.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "megbrain/imperative/profiler_plugin.h"
#include "megbrain/graph.h"
#include "megbrain/graph/event.h"
#include "./profiler/events.h"
namespace mgb::imperative::interpreter::intl {
ProfilerPlugin::ProfilerPlugin(cg::ComputingGraph* graph): PluginBase(graph) {
using namespace cg;
using namespace cg::event;
using namespace profiler;
auto on_seq_start = [this](CompSeqExecBeforeStart const& event) {
// reset
mgb_assert(!event.graph->options().imperative_proxy_graph);
if (m_opr_dict.empty() && m_var_dict.empty()) {
init_seq(event.exec);
}
Profiler::record<ScopeEvent>("DispatchOprs");
event.exec->iter_opr_seq([this](OperatorNodeBase* opr) -> bool{
auto& opr_info = get_opr_info(opr);
SmallVector<uint64_t> inputs;
for (auto input: opr->input()) {
inputs.push_back(get_var_info(input).id);
}
SmallVector<uint64_t> outputs;
for (auto output: opr->output()) {
outputs.push_back(get_var_info(output).id);
}
auto opr_name = opr->dyn_typeinfo()->name;
auto copy_params = [params = opr_info.params] { return *params; };
Profiler::record<OpDispatchEvent>(opr_info.id, opr_name, copy_params, inputs, outputs);
for (auto output: opr->output()) {
auto var_id = get_var_info(output).id;
Profiler::record<TensorDeclareEvent>(var_id);
}
return true;
});
Profiler::record<ScopeFinishEvent>("DispatchOprs");
Profiler::record<ScopeEvent>("Constants");
for (auto&& [var, var_info]: m_var_dict) {
if (var_info->is_const) {
bool valid = var->dev_tensor_valid();
auto layout = valid ? var->layout() : TensorLayout();
Profiler::record<TensorDeclareEvent>(var_info->id);
Profiler::record<TensorProduceEvent>(var_info->id, layout, var->comp_node(), valid ? var->dev_tensor().raw_ptr() : nullptr);
} else {
var_info->rt_ref_cnt = var_info->ref_cnt;
}
}
Profiler::record<ScopeFinishEvent>("Constants");
};
auto on_opr_start = [this](OprExecStart const& event) {
OperatorNodeBase* opr = event.opr;
auto& opr_info = get_opr_info(opr);
auto comp_node = opr_info.comp_node;
auto runner = [&opr_info] {
Profiler::record<OpExecuteEvent>(opr_info.id);
};
event.env->dispatch_on_comp_node(comp_node, runner);
auto inputs = opr->input();
for (auto&& input: inputs) {
auto& var_info = get_var_info(input);
auto runner = [&var_info, input] {
auto inp_id = var_info.id;
Profiler::record<OpInputEvent>(inp_id, input->shape());
Profiler::record<TensorUsageEvent>(inp_id);
Profiler::record<OpInputFinishEvent>(inp_id, input->shape());
};
event.env->dispatch_on_comp_node(comp_node, runner);
}
};
auto on_opr_finish = [this](OprExecKernelEnd const& event) {
OperatorNodeBase* opr = event.opr;
auto& opr_info = get_opr_info(opr);
auto comp_node = opr_info.comp_node;
auto inputs = opr->input();
auto outputs = opr->output();
for (auto input: inputs) {
auto& var_info = get_var_info(input);
auto runner = [&var_info] {
if (!var_info.is_const) {
if (--var_info.rt_ref_cnt == 0) {
Profiler::record<TensorReleaseEvent>(var_info.id);
}
}
};
event.env->dispatch_on_comp_node(comp_node, runner);
}
for (auto output: outputs) {
auto& var_info = get_var_info(output);
mgb_assert(comp_node == output->comp_node(), "opr comp_node mismatch");
auto runner = [&var_info, output] {
auto out_id = var_info.id;
bool valid = output->dev_tensor_valid();
auto layout = valid ? output->layout() : TensorLayout();
Profiler::record<OpOutputEvent>(out_id, output->shape());
Profiler::record<TensorProduceEvent>(out_id, layout, output->comp_node(), valid ? output->dev_tensor().raw_ptr() : nullptr);
if (!var_info.ref_cnt) {
Profiler::record<TensorReleaseEvent>(var_info.id);
}
Profiler::record<OpOutputFinishEvent>(out_id, output->shape());
};
event.env->dispatch_on_comp_node(comp_node, runner);
}
auto runner = [&opr_info]() {
Profiler::record<OpExecuteFinishEvent>(opr_info.id);
};
event.env->dispatch_on_comp_node(comp_node, runner);
};
auto on_before_kern = [this](BeforeKernel const& event) {
OperatorNodeBase* opr = event.opr;
Profiler::record<KernelExecuteEvent>(get_opr_info(opr).id, get_opr_info(opr).id, Timer::record_event(event.comp_node));
};
auto on_after_kern = [this](AfterKernel const& event) {
OperatorNodeBase* opr = event.opr;
Profiler::record<KernelExecuteFinishEvent>(get_opr_info(opr).id, get_opr_info(opr).id, Timer::record_event(event.comp_node));
};
auto on_graph_compile = [this](const CompSeqOrderDetermined&) {
m_opr_dict.clear();
m_var_dict.clear();
};
auto on_seq_finish = [this](CompSeqExecFinished const& event) {
for (auto&& [var, var_info]: m_var_dict) {
MGB_MARK_USED_VAR(var);
if (var_info->is_const) {
Profiler::record<TensorReleaseEvent>(var_info->id);
}
Profiler::record<TensorEraseEvent>(var_info->id, var_info->ref_cnt);
}
};
add_event_handler(graph->event().register_receiver<CompSeqExecBeforeStart>(on_seq_start));
add_event_handler(graph->event().register_receiver<OprExecStart>(on_opr_start));
add_event_handler(graph->event().register_receiver<OprExecKernelEnd>(on_opr_finish));
add_event_handler(graph->event().register_receiver<BeforeKernel>(on_before_kern));
add_event_handler(graph->event().register_receiver<AfterKernel>(on_after_kern));
add_event_handler(graph->event().register_receiver<CompSeqOrderDetermined>(on_graph_compile));
add_event_handler(graph->event().register_receiver<CompSeqExecFinished>(on_seq_finish));
}
void ProfilerPlugin::init_seq(cg::AsyncExecutable *comp_seq) {
mgb_assert(m_opr_dict.empty());
mgb_assert(m_var_dict.empty());
comp_seq->iter_opr_seq([this](cg::OperatorNodeBase* opr){
auto comp_nodes = get_opr_comp_node_set(opr);
mgb_assert(comp_nodes.size() == 1);
register_opr(opr);
for (auto&& input: opr->input()) {
if (m_var_dict.count(input) == 0) {
register_var(input).is_const = true;
} else {
get_var_info(input).ref_cnt++;
}
}
for (auto&& output: opr->output()) {
register_var(output).is_const = false;
}
//TODO: check ref_cnt
return true;
});
}
ProfilerPlugin::OprInfo& ProfilerPlugin::register_opr(cg::OperatorNodeBase *opr) {
OprInfo info;
info.id = Profiler::next_id();
auto params = std::make_shared<std::unordered_map<std::string, std::string>>();
auto params_json = opr->to_json();
for (auto&& [k, v]: params_json->cast_final<json::Object>().get_impl()) {
params->insert({k.get_impl(), v->to_string()});
}
info.params = std::move(params);
auto comp_nodes = cg::get_opr_comp_node_set(opr);
mgb_assert(comp_nodes.size() == 1, "only support single comp_node opr");
info.comp_node = *comp_nodes.begin();
return m_opr_dict.insert({opr, info}).first->second;
}
ProfilerPlugin::VarInfo& ProfilerPlugin::register_var(cg::VarNode *var) {
auto info = std::make_unique<VarInfo>();
info->id = Profiler::next_id();
info->is_const = false;
info->ref_cnt = 0;
info->rt_ref_cnt = 0;
return *m_var_dict.insert({var, std::move(info)}).first->second;
}
ProfilerPlugin::OprInfo& ProfilerPlugin::get_opr_info(cg::OperatorNodeBase *opr) {
return m_opr_dict.at(opr);
}
ProfilerPlugin::VarInfo& ProfilerPlugin::get_var_info(cg::VarNode *var) {
return *m_var_dict.at(var);
}
}
/**
* \file imperative/src/impl/interpreter/profiler.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megbrain/plugin/base.h"
#include "megbrain/imperative/profiler.h"
namespace mgb::imperative::interpreter::intl {
class ProfilerPlugin: public PluginBase {
public:
struct OprInfo {
uint64_t id;
CompNode comp_node;
std::shared_ptr<std::unordered_map<std::string, std::string>> params;
};
struct VarInfo {
uint64_t id;
bool is_const;
size_t ref_cnt;
std::atomic_size_t rt_ref_cnt;
};
private:
std::unordered_map<cg::OperatorNodeBase*, OprInfo> m_opr_dict;
std::unordered_map<cg::VarNode*, std::unique_ptr<VarInfo>> m_var_dict;
public:
explicit ProfilerPlugin(cg::ComputingGraph* graph);
void init_seq(cg::AsyncExecutable* comp_seq);
OprInfo& register_opr(cg::OperatorNodeBase* opr);
VarInfo& register_var(cg::VarNode* var);
OprInfo& get_opr_info(cg::OperatorNodeBase* opr);
VarInfo& get_var_info(cg::VarNode* var);
};
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册