提交 8c2b916e 编写于 作者: M Megvii Engine Team

refactor(imperative): remove some methods in proxy graph

GitOrigin-RevId: 1fb68a1da290dc0de8976c0537acc708bab07fa3
上级 2348a963
......@@ -121,22 +121,6 @@ private:
};
MGB_DYN_TYPE_OBJ_FINAL_IMPL(ProxyGraph::InputPlaceholder);
class ProxyGraph::ExecEnv final : public cg::GraphExecutable::ExecEnv {
public:
void dispatch_on_comp_node(CompNode, Task&& task) override { task(); }
void dispatch_on_comp_node_with_mask(
CompNode, Task&& task, cg::ExecutionMask* mask) override {
mgb_throw_if(
mask, GraphError, "ExecutionMask not supported in imperative mode");
task();
}
void pause_exec() override {}
void resume_exec() override {}
};
class ProxyGraph::StaticInferManager : public cg::static_infer::StaticInferManager {
public:
using Tag = cg::static_infer::Tag;
......@@ -183,26 +167,8 @@ public:
}
InferType get_infer_type(Tag var) override {
// may be called during get_proxy_opr or make_backward_graph
// don't let opr apply any immediate optimization
return {InferType::MISSING_INP, InferType::MISSING_INP};
if (auto opr = var->owner_opr()->try_cast_final<InputPlaceholder>()) {
return {var->shape().ndim ? InferType::CONST : InferType::MISSING_INP,
opr->m_tensor ? InferType::CONST : InferType::MISSING_INP};
}
if (cur_opr) {
auto&& outputs = cur_opr->output();
auto&& it = std::find(outputs.begin(), outputs.end(), var);
if (it != outputs.end()) {
return {infer_shape_fallible(var) ? InferType::CONST
: InferType::MISSING_INP,
// value inference could be expensive
InferType::MISSING_INP};
}
}
return {InferType::MISSING_INP, InferType::MISSING_INP};
}
void update() {
......@@ -471,7 +437,6 @@ std::atomic<size_t> ProxyGraph::ProxyGraphImpl::m_node_id = 0;
ProxyGraph::ProxyGraph()
: m_graph(ProxyGraphImpl::make(this)),
m_env{new ExecEnv},
m_static_infer_manager(new StaticInferManager(this)),
m_seq_comp_node_optimizer(new SeqCompNodeOptimizer()) {}
......@@ -506,32 +471,6 @@ private:
/*********************** Physical Tensor Impl ***********************/
SmallVector<LogicalTensorDesc> ProxyGraph::infer_output_attrs(
const OpDef& opdef, const SmallVector<Tensor*>& inputs) {
SmallVector<LogicalTensorDesc> ret;
CUR_OPR_GUARD(get_proxy_opr(opdef, inputs));
::mgb::opr::intl::WorkspaceLimitHook::set_impl(
m_graph.get(), ProxyGraph::get_workspace_limit);
do_shape_infer(true);
for (auto&& i : m_cur_opr->usable_output()) {
mgb_assert(i->dtype().valid() && i->comp_node().valid());
mgb_assert(i->shape().ndim || i->contain_flag(VarNode::Flag::NO_SYS_MEM_ALLOC));
ret.push_back({{i->shape(), i->dtype()}, i->comp_node()});
}
return ret;
}
void ProxyGraph::invoke_op(
const OpDef& opdef, const SmallVector<Tensor*>& inputs,
const SmallVector<Tensor*>& outputs, const SmallVector<Tensor*>& workspaces) {
CUR_OPR_GUARD(get_proxy_opr(opdef, inputs));
init_output_tensor(outputs, workspaces);
for (auto oup : m_cur_opr->output()) {
m_graph->add_used_comp_node(oup->comp_node());
}
m_cur_opr->execute(*m_env);
}
void ProxyGraph::cleanup() {
if (m_cur_opr) {
for (auto&& i : m_cur_opr->input()) {
......@@ -545,102 +484,8 @@ void ProxyGraph::cleanup() {
m_cur_opr = nullptr;
}
void ProxyGraph::init_output_tensor(
const SmallVector<Tensor*>& outputs, const SmallVector<Tensor*>& workspaces) {
// get proxy opr
auto proxy = m_cur_opr;
auto get_workspace_size = [=](CompNode cn, size_t old_limit) {
size_t limit = 0;
for (auto&& var : workspaces) {
limit += var->dtype().size(var->shape().total_nr_elems());
}
return limit;
};
::mgb::opr::intl::WorkspaceLimitHook::set_impl(m_graph.get(), get_workspace_size);
do_shape_infer(true);
size_t j = 0;
size_t k = 0;
for (auto&& var : proxy->output()) {
auto&& chk = var->m_mem_plan.reset_from_owner_var().chunk();
if (var->contain_flag(VarNode::Flag::VOLATILE_CONTENT)) {
// workspace
if (workspaces.size()) {
mgb_assert(k < workspaces.size());
auto&& layout = workspaces[k]->layout();
mgb_assert(
var->comp_node() == workspaces[k]->comp_node() &&
var->shape().eq_shape(layout) && var->dtype() == layout.dtype);
var->m_dev_tensor = workspaces[k]->dev_tensor();
++k;
} else {
TensorLayout layout{var->shape(), var->dtype(), var->format()};
var->m_dev_tensor = BlobManager::inst()->alloc_workspace_with_defrag(
var->comp_node(), layout);
}
} else {
mgb_assert(j < outputs.size());
auto&& tensor = outputs[j];
auto&& layout = tensor->layout();
mgb_assert(
var->comp_node() == tensor->comp_node() &&
var->shape().eq_shape(layout) && var->dtype() == layout.dtype);
var->assign_dev_tensor_from_tensor(tensor->dev_tensor());
++j;
}
chk.mem_alloc_status.set_from_owner_var();
}
mgb_assert(j == outputs.size());
mgb_assert(k == workspaces.size());
// Memory forwarding was bypassed in megbrain with graph option
// imerative_proxy_graph on, here we call mem_plan_fwd_in2out_readonly
// to initialize some opr(e.g. Subtensor)'s internal state
// TODO: implement memory forwarding
proxy->mem_plan_fwd_in2out_readonly();
{
// some opr (e.g. Reduce) rely on on_mem_status_changed to set
// input/output tensor corretly, since we bypass var_node_mem_mgr
// on_mem_status_changed should be called here
auto&& cb = proxy->get_opr_event_callback().on_mem_status_changed;
if (cb.valid()) {
cb.val()();
}
}
}
cg::OperatorNodeBase* ProxyGraph::get_proxy_opr(
const OpDef& opdef, const SmallVector<Tensor*>& inputs) {
VarNodeArray vinputs(inputs.size());
for (size_t i = 0; i < inputs.size(); ++i) {
vinputs[i] = InputPlaceholder::make(*m_graph, *inputs[i]).node();
}
auto opr = OpDef::apply_on_var_node(opdef, vinputs)[0]->owner_opr();
mgb_assert(!opr->same_type<InputPlaceholder>());
for (auto&& i : opr->input()) {
mgb_assert(i->owner_opr()->same_type<InputPlaceholder>());
}
return opr;
}
/*********************** Logical Tensor Impl ***********************/
std::tuple<SmallVector<LogicalTensorDesc>, bool> ProxyGraph::
infer_output_attrs_fallible(
const OpDef& opdef, const SmallVector<LogicalTensorDesc>& inputs) {
// this function is just a placeholder
// it will be overrided by ProxyGraphTypeI::infer_output_attrs_fallible in minigraph
mgb_assert(0);
}
struct ProxyGraph::GradGraph {
cg::VarNodeArray inputs;
cg::VarNodeArray outputs;
cg::VarNodeArray output_grads;
cg::VarNode* grad;
};
EncodedSubgraph ProxyGraph::make_backward_graph(
const OpDef& opdef, const SmallVector<LogicalTensorDesc>& input_descs,
const SmallVector<bool>& input_requires_grad,
......@@ -793,22 +638,6 @@ VarNodeArray ProxyGraph::make_input_place_holders(
/*********************** Common Impl ***********************/
bool ProxyGraph::do_shape_infer(bool sync_value) {
m_static_infer_manager->update();
bool validated = true;
for (auto* var : m_cur_opr->output()) {
if (sync_value) {
var->shape(m_static_infer_manager->infer_shape(var));
} else if (auto* shape = m_static_infer_manager->infer_shape_fallible(var)) {
var->shape(*shape);
} else {
validated = false;
}
}
return validated;
}
TensorPtr ProxyGraph::as_tensor(cg::OperatorNodeBase* opr, bool share) {
// TODO : maybe some tensor should copy value from origin opr rather than
// share the RawStorage
......
......@@ -27,44 +27,22 @@ public:
static std::unique_ptr<MegBrainError> get_async_error() {
return std::move(tm_async_error);
}
static size_t get_workspace_limit(CompNode cn, size_t old_limit) {
size_t free = cn.get_free_mem();
size_t lmt = cn.get_max_block_size_available();
return std::max(lmt, free);
}
/********************** Physical Tensor API **********************/
SmallVector<LogicalTensorDesc> infer_output_attrs(
const OpDef& opdef, const SmallVector<Tensor*>& inputs);
void invoke_op(
const OpDef& opdef, const SmallVector<Tensor*>& inputs,
const SmallVector<Tensor*>& outputs, const SmallVector<Tensor*>& workspace);
EncodedSubgraph make_backward_graph(
const OpDef& opdef, const SmallVector<LogicalTensorDesc>& input_descs,
const SmallVector<bool>& input_requires_grad,
const SmallVector<bool>& output_has_grad);
/********************** Logical Tensor API **********************/
size_t get_opr_output_size(
const OpDef& opdef, const SmallVector<LogicalTensorDesc>& inputs);
std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
const OpDef& opdef, const SmallVector<LogicalTensorDesc>& inputs);
private:
ProxyGraph();
class ProxyGraphImpl;
class ExecEnv;
class StaticInferManager;
class SeqCompNodeOptimizer;
class InputPlaceholder;
struct ProxyGraphInst;
struct GradGraph;
class CurOprGuard;
void reset();
......@@ -73,12 +51,6 @@ private:
void cleanup();
void init_output_tensor(
const SmallVector<Tensor*>& outputs, const SmallVector<Tensor*>& workspace);
cg::OperatorNodeBase* get_proxy_opr(
const OpDef& opdef, const SmallVector<Tensor*>& inputs);
/********************** Logical Tensor Helper **********************/
cg::VarNodeArray make_input_place_holders(
......@@ -86,14 +58,11 @@ private:
/********************** Common Helper **********************/
bool do_shape_infer(bool sync_value);
TensorPtr as_tensor(cg::OperatorNodeBase* opr, bool share = true);
cg::OperatorNodeBase* m_cur_opr = nullptr;
std::unique_ptr<ProxyGraphImpl> m_graph;
size_t m_max_op_cnt = 100;
std::unique_ptr<ExecEnv> m_env;
std::unique_ptr<StaticInferManager> m_static_infer_manager;
std::unique_ptr<SeqCompNodeOptimizer> m_seq_comp_node_optimizer;
......
......@@ -801,18 +801,19 @@ public:
return ret;
}
SmallVector<LogicalTensorDesc> infer_output_attrs(
const OpDef& def, const SmallVector<Tensor*>& inputs) {
SmallVector<LogicalTensorDesc> descs;
auto& minigraph = get_cached_minigraph(def, inputs);
SmallVector<TensorPtr> apply_on_physical_tensor(
const OpDef& def, SmallVector<TensorPtr> inputs) {
auto raw_inputs = to_raw_ptr_array(inputs);
auto& minigraph = get_cached_minigraph(def, raw_inputs);
auto _ = scoped_attach(&minigraph);
auto sess = minigraph.infer_session(inputs);
auto sess = minigraph.infer_session(raw_inputs);
::mgb::opr::intl::WorkspaceLimitHook::set_impl(
minigraph.opr()->owner_graph(), get_workspace_limit);
// some output var in minigraph.opr()->output() may not appears in
// minigraph.opr()->usable_output() bug execution may use the attrs for those
// output var, so we infer attrs for all outputs, but only return
// LogicalTensorDesc for minigraph.opr()->usable_output()
::mgb::opr::intl::WorkspaceLimitHook::set_impl(
minigraph.opr()->owner_graph(), get_workspace_limit);
SmallVector<LogicalTensorDesc> output_descs;
for (size_t i = 0; i < minigraph.opr()->output().size(); ++i) {
auto* shape = sess.infer(sess.output_data[i].shape_infer, true);
mgb_assert(shape);
......@@ -825,15 +826,9 @@ public:
mgb_assert(
ovar->shape().ndim ||
ovar->contain_flag(VarNode::Flag::NO_SYS_MEM_ALLOC));
descs.push_back({{ovar->shape(), ovar->dtype()}, ovar->comp_node()});
output_descs.push_back({{ovar->shape(), ovar->dtype()}, ovar->comp_node()});
}
return descs;
}
SmallVector<TensorPtr> apply_on_physical_tensor(
const OpDef& def, SmallVector<TensorPtr> inputs) {
auto raw_inputs = to_raw_ptr_array(inputs);
auto output_descs = infer_output_attrs(def, raw_inputs);
SmallVector<TensorPtr> outputs(output_descs.size(), {});
for (size_t i = 0; i < outputs.size(); i++) {
outputs[i] =
......@@ -853,11 +848,8 @@ public:
}
}
}
auto& minigraph = get_cached_minigraph(def, raw_inputs);
auto _ = scoped_attach(&minigraph);
// some opr (e.g. Subtensor) may invoke infer_value during execution,
// so we need create inference session here
auto sess = minigraph.infer_session(raw_inputs);
minigraph.execute(raw_inputs, raw_outputs, m_env);
for (auto&& cn : used_cns) {
for (auto&& in : inputs) {
......
......@@ -10,11 +10,6 @@
*/
#include "./mini_graph.h"
#if 0
// ../proxy_graph.h is deprecated, leave here for debug purpose
// uncomment #if 0 macro to debug
#include "../proxy_graph.h"
#endif
namespace mgb::imperative::proxy_graph {
MGB_DYN_TYPE_OBJ_FINAL_IMPL(ProxyGraph::InputPlaceholder);
......@@ -28,18 +23,6 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) {
auto ret = proxy_graph::ProxyGraphTypeI::inst().infer_output_attrs_fallible(
def, inputs);
#if 0
// delete me after the new implementation is stable
auto ref = ProxyGraph::get_default_graph()->infer_output_attrs_fallible(def, inputs);
auto& [a, _1] = ret;
auto& [b, _2] = ref;
if (a.size() != b.size()) mgb_trap();
for (size_t i = 0; i < a.size(); ++i) {
if (a[i].layout.dtype != b[i].layout.dtype) mgb_trap();
if (a[i].comp_node != b[i].comp_node) mgb_trap();
if (!a[i].layout.eq_shape(b[i].layout)) mgb_trap();
}
#endif
return ret;
}
......
......@@ -17,83 +17,6 @@ namespace mgb {
namespace imperative {
namespace proxy_graph_detail {
// those functions are reimplemented with opr cache
// in ./proxy_graph/mini_graph.h
#if 0
namespace {
SmallVector<Tensor*> to_raw_ptr_array(
const SmallVector<TensorPtr>& inputs, bool ensure_storage = true) {
SmallVector<Tensor*> ret;
for (auto&& i : inputs) {
mgb_assert(i);
ret.push_back(i.get());
if (ensure_storage) {
// apply lazy allocation
i->blob()->storage();
}
}
return ret;
}
SmallVector<LogicalTensorDesc> infer_output_attrs(
const OpDef& def, const SmallVector<TensorPtr>& inputs) {
auto&& graph = ProxyGraph::get_default_graph();
return graph->infer_output_attrs(def, to_raw_ptr_array(inputs));
}
} // anonymous namespace
void exec(
const OpDef& def, const SmallVector<TensorPtr>& inputs,
const SmallVector<TensorPtr>& outputs,
const SmallVector<TensorPtr>& workspaces) {
auto&& graph = ProxyGraph::get_default_graph();
auto raw_inputs = to_raw_ptr_array(inputs), raw_outputs = to_raw_ptr_array(outputs),
raw_workspaces = to_raw_ptr_array(workspaces);
CompNode::UnorderedSet used_cns;
for (auto&& out : raw_outputs) {
auto cn = out->comp_node();
if (used_cns.insert(cn).second) {
for (auto&& in : inputs) {
if (in->comp_node() != cn) {
auto&& e = in->get_or_create_event();
e->device_wait_by(cn);
}
}
}
}
graph->invoke_op(def, raw_inputs, raw_outputs, raw_workspaces);
for (auto&& cn : used_cns) {
for (auto&& in : inputs) {
if (in->comp_node() != cn) {
in->add_release_callback(cn);
}
}
}
}
SmallVector<TensorPtr> apply_on_physical_tensor(
const OpDef& def, SmallVector<TensorPtr> inputs) {
auto output_descs = infer_output_attrs(def, inputs);
SmallVector<TensorPtr> outputs(output_descs.size(), {});
for (size_t i = 0; i < outputs.size(); i++) {
outputs[i] = Tensor::make(output_descs[i].layout, output_descs[i].comp_node);
}
exec(def, inputs, outputs, {});
auto async_error = ProxyGraph::get_async_error();
if (async_error) {
throw *async_error;
}
return outputs;
}
std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(const OpDef& def,
const SmallVector<LogicalTensorDesc>& inputs) {
auto&& graph = ProxyGraph::get_default_graph();
return graph->infer_output_attrs_fallible(def, inputs);
}
#endif
EncodedSubgraph make_backward_graph(
const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs,
const SmallVector<bool>& input_requires_grad,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册