提交 4f0e6eae 编写于 作者: M Megvii Engine Team

fix(mge/trace): re-open gopt level in trace

GitOrigin-RevId: 5ebc712690eb65d2c6e4fe08b6353cd403f18cea
上级 8494a152
......@@ -131,7 +131,7 @@ class trace:
:param sublinear_memory_config: configuration for sublinear memory optimization.
If not None, it enables sublinear memory optimization with given setting.
:param profiling: whether to profile compiled trace. Default: False
:param opt_level: optimization level for compiling trace.
:param opt_level: optimization level for compiling trace. Default: 2
:param symbolic_shape: whether to use symbolic shape for tracing. Default: True
"""
......@@ -147,7 +147,7 @@ class trace:
capture_as_const=False,
sublinear_memory_config: SublinearMemoryConfig = None,
profiling: bool = False,
opt_level: int = None,
opt_level: int = 2,
symbolic_shape: bool = True,
):
self.__wrapped__ = function
......@@ -377,11 +377,7 @@ class trace:
)
readers = [G.OutputNode(x()._varnode).outputs[0] for x in lazy_eval_tensors]
self._apply_graph_options(lazy_eval_graph)
# FIXME
if self._graph_opt_level is not None:
lazy_eval_graph.options.graph_opt_level = self._graph_opt_level
else:
lazy_eval_graph.options.graph_opt_level = 2
lazy_eval_graph.options.graph_opt_level = self._graph_opt_level
lazy_eval_graph._set_priority_to_id([*lazy_eval_links, *readers])
lazy_eval_graph.compile(*lazy_eval_links, *readers)
lazy_eval_graph()
......@@ -500,11 +496,7 @@ class trace:
graph.options.no_force_inplace = True
graph.options.seq_opt.enable_seq_comp_node_opt = False
# graph opt level
# if self._graph_opt_level is not None:
# graph.options.graph_opt_level = self._graph_opt_level
# FIXME
graph.options.graph_opt_level = 0
graph.options.graph_opt_level = self._graph_opt_level
# sublinear
if self._sublinear_memory_config is not None:
graph.options.enable_sublinear_memory_opt = True
......@@ -634,11 +626,7 @@ class trace:
opnode = info.shape_reader = G.AttrOutputNode(v, *in_out_links)
add_reader(opnode)
# FIXME
if self._graph_opt_level is not None:
graph.options.graph_opt_level = self._graph_opt_level
else:
graph.options.graph_opt_level = 2
graph.options.graph_opt_level = self._graph_opt_level
graph._set_priority_to_id([*readers, *in_out_links, *io_links])
graph.compile(*readers, *in_out_links, *io_links)
......
......@@ -113,6 +113,48 @@ def test_exclude_from_trace(trace_mode):
np.testing.assert_equal(f(x).numpy(), y)
@pytest.mark.parametrize("trace_mode", [False, True])
def test_elemwise_fuse(trace_mode):
# explicitly declare opt_level as 2
@trace(symbolic=trace_mode, opt_level=2)
def f(a, b):
base = 0
c = b - a
_, idx = F.topk(c, 3)
# internally, biased_idx will be idx as gopt will ignore the addition
biased_idx = base + idx
return biased_idx
a = tensor(np.ones((7, 2)), dtype=np.int32)
b = tensor(2 * np.ones((7, 2)), dtype=np.float32)
for i in range(3):
y = f(a, b)
y.numpy()
@pytest.mark.parametrize("trace_mode", [False, True])
def test_elemwise_fuse_in_grad(trace_mode):
w = Parameter(np.ones([4, 6]), dtype="float32")
gm = GradManager().attach(w)
opt = optim.SGD([w], lr=0.01, momentum=0.9, weight_decay=5e-4)
# explicitly declare opt_level as 2
@trace(symbolic=trace_mode, opt_level=2)
def f():
with gm:
wm = F.sum(w ** 2, axis=1) ** 0.5
loss = wm.mean()
gm.backward(loss)
opt.step().clear_grad()
return loss
for i in range(3):
y = f()
y.numpy()
def test_print_in_trace():
for symbolic in [False]: # cannot read value in symbolic mode
......@@ -221,7 +263,6 @@ def test_trace_profiler(trace_mode):
assert out.get("profiler")
@pytest.mark.skip(reason="force opt_level=0 when building graph")
def test_goptions():
@trace(symbolic=True, opt_level=0, capture_as_const=True)
def f(x):
......@@ -240,7 +281,6 @@ def test_goptions():
np.testing.assert_equal(g(d).numpy().item(), 1.0)
@pytest.mark.skip(reason="force opt_level=0 when building graph")
def test_goptions_log_sum_exp():
@trace(symbolic=True, opt_level=0, capture_as_const=True)
def f(x, y):
......
......@@ -27,12 +27,12 @@ std::shared_ptr<OpDef> make_from_op_node(cg::OperatorNodeBase* node_) {
return Elemwise::make(node->param().mode);
}
cg::OperatorNodeBase* apply_on_var_node(
auto apply_on_var_node(
const OpDef& def,
const VarNodeArray& inputs) {
auto&& elemwise_opr = def.cast_final_safe<Elemwise>();
OperatorNodeConfig config{elemwise_opr.make_name()};
return opr::Elemwise::make(inputs, elemwise_opr.mode, config).node()->owner_opr();
return opr::Elemwise::make(inputs, elemwise_opr.mode, config);
}
std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册